made everything build again after deleting the unused MultiNetworks functions, incl. deleting related entries in SimpleNetworkBuilder;
deleted PairNetworkNode; renamed EsotericActions.cpp to SpecialPurposeActions.cpp
This commit is contained in:
Родитель
18b7b36cca
Коммит
a4a20183aa
|
@ -43,16 +43,6 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config);
|
||||||
template <typename ElemType>
|
template <typename ElemType>
|
||||||
void DoTopologyPlot(const ConfigParameters& config);
|
void DoTopologyPlot(const ConfigParameters& config);
|
||||||
|
|
||||||
// deprecated (EsotericActions.cp)
|
// special purpose (EsotericActions.cp)
|
||||||
template <typename ElemType>
|
template <typename ElemType>
|
||||||
void DoConvertFromDbn(const ConfigParameters& config);
|
void DoConvertFromDbn(const ConfigParameters& config);
|
||||||
template <typename ElemType>
|
|
||||||
void DoEvalUnroll(const ConfigParameters& config);
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoEncoderDecoder(const ConfigParameters& config);
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoBidirectionEncoderDecoder(const ConfigParameters& config);
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config);
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoBeamSearchDecoding(const ConfigParameters& config);
|
|
||||||
|
|
|
@ -170,7 +170,7 @@
|
||||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\Common\TimerUtility.cpp" />
|
<ClCompile Include="..\Common\TimerUtility.cpp" />
|
||||||
<ClCompile Include="EsotericActions.cpp" />
|
<ClCompile Include="SpecialPurposeActions.cpp" />
|
||||||
<ClCompile Include="EvalActions.cpp" />
|
<ClCompile Include="EvalActions.cpp" />
|
||||||
<ClCompile Include="OtherActions.cpp" />
|
<ClCompile Include="OtherActions.cpp" />
|
||||||
<ClCompile Include="TrainActions.cpp" />
|
<ClCompile Include="TrainActions.cpp" />
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
<ClCompile Include="OtherActions.cpp">
|
<ClCompile Include="OtherActions.cpp">
|
||||||
<Filter>Actions</Filter>
|
<Filter>Actions</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="EsotericActions.cpp">
|
<ClCompile Include="SpecialPurposeActions.cpp">
|
||||||
<Filter>Actions</Filter>
|
<Filter>Actions</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
@ -1,424 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) Microsoft. All rights reserved.
|
|
||||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
|
||||||
//
|
|
||||||
// EsotericActions.cpp -- CNTK actions that are deprecated
|
|
||||||
//
|
|
||||||
|
|
||||||
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
|
||||||
|
|
||||||
#include "stdafx.h"
|
|
||||||
#include "Basics.h"
|
|
||||||
#include "Actions.h"
|
|
||||||
#include "ComputationNetwork.h"
|
|
||||||
#include "ComputationNode.h"
|
|
||||||
#include "DataReader.h"
|
|
||||||
#include "DataWriter.h"
|
|
||||||
#include "SimpleNetworkBuilder.h"
|
|
||||||
#include "NDLNetworkBuilder.h"
|
|
||||||
#include "SynchronousExecutionEngine.h"
|
|
||||||
#include "ModelEditLanguage.h"
|
|
||||||
#include "SGD.h"
|
|
||||||
#include "Config.h"
|
|
||||||
#include "MultiNetworksSGD.h"
|
|
||||||
#include "SimpleEvaluator.h"
|
|
||||||
#include "SimpleOutputWriter.h"
|
|
||||||
#include "MultiNetworksEvaluator.h"
|
|
||||||
#include "BestGpu.h"
|
|
||||||
#include "ScriptableObjects.h"
|
|
||||||
#include "BrainScriptEvaluator.h"
|
|
||||||
#include "BrainScriptParser.h"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <chrono>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <vector>
|
|
||||||
#include <iostream>
|
|
||||||
#include <queue>
|
|
||||||
#include <set>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#ifndef let
|
|
||||||
#define let const auto
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace Microsoft::MSR;
|
|
||||||
using namespace Microsoft::MSR::CNTK;
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoConvertFromDbn() - implements CNTK "convertdbn" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoConvertFromDbn(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
wstring modelPath = config(L"modelPath");
|
|
||||||
wstring dbnModelPath = config(L"dbnModelPath");
|
|
||||||
|
|
||||||
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
|
|
||||||
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
|
|
||||||
net->Save(modelPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoConvertFromDbn<float>(const ConfigParameters& config);
|
|
||||||
template void DoConvertFromDbn<double>(const ConfigParameters& config);
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoEvalUnroll() - implements CNTK "testunroll" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
// Special early implementation of RNNs by emulating them as a DNN.
|
|
||||||
// The code is very restricted to simple RNNs.
|
|
||||||
// The idea can be used for more complicated network but need to know which nodes are stateful or time-dependent so that unroll is done in a correct way to represent recurrent networks.
|
|
||||||
// TODO: can probably be removed.
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoEvalUnroll(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
//test
|
|
||||||
ConfigParameters readerConfig(config(L"reader"));
|
|
||||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType> testDataReader(readerConfig);
|
|
||||||
|
|
||||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
|
||||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
|
||||||
size_t epochSize = config(L"epochSize", "0");
|
|
||||||
if (epochSize == 0)
|
|
||||||
{
|
|
||||||
epochSize = requestDataSize;
|
|
||||||
}
|
|
||||||
wstring modelPath = config(L"modelPath");
|
|
||||||
intargvector mbSize = minibatchSize;
|
|
||||||
wstring path2EvalResults = config(L"path2EvalResults", L"");
|
|
||||||
|
|
||||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
|
||||||
|
|
||||||
MultiNetworksEvaluator<ElemType> eval(net);
|
|
||||||
double evalEntropy;
|
|
||||||
eval.EvaluateUnroll(&testDataReader, mbSize[0], evalEntropy, path2EvalResults == L"" ? nullptr : path2EvalResults.c_str(), epochSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoEvalUnroll<float>(const ConfigParameters& config);
|
|
||||||
template void DoEvalUnroll<double>(const ConfigParameters& config);
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoEncoderDecoder() - implements CNTK "trainEncoderDecoder" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
This implements sequence to sequence translation paper in
|
|
||||||
http://arxiv.org/pdf/1409.3215.pdf
|
|
||||||
|
|
||||||
*/
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoEncoderDecoder(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
vector<IComputationNetBuilder<ElemType>*> netBuilders;
|
|
||||||
vector<IDataReader<ElemType>*> trainDataReader;
|
|
||||||
vector<IDataReader<ElemType>*> validationDataReader;
|
|
||||||
|
|
||||||
ConfigParameters configSGD = config(L"SGD");
|
|
||||||
bool makeMode = config(L"makeMode", "true");
|
|
||||||
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
|
|
||||||
IComputationNetBuilder<ElemType>* decoderNetBuilder = NULL;
|
|
||||||
|
|
||||||
ConfigParameters readerConfig = config(L"encoderReader");
|
|
||||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
|
|
||||||
|
|
||||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
|
||||||
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
|
|
||||||
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
|
|
||||||
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
|
|
||||||
|
|
||||||
if (config.Exists("EncoderNetworkBuilder"))
|
|
||||||
{
|
|
||||||
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
|
|
||||||
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("Need encoder network");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (config.Exists("DecoderNetworkBuilder"))
|
|
||||||
{
|
|
||||||
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
|
|
||||||
decoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("Need decoder networks");
|
|
||||||
}
|
|
||||||
|
|
||||||
MultiNetworksSGD<ElemType> sgd(configSGD);
|
|
||||||
|
|
||||||
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
|
|
||||||
|
|
||||||
netBuilders.push_back(encoderNetBuilder);
|
|
||||||
netBuilders.push_back(decoderNetBuilder);
|
|
||||||
trainDataReader.push_back(encoderDataReader);
|
|
||||||
trainDataReader.push_back(decoderDataReader);
|
|
||||||
validationDataReader.push_back(cvEncoderDataReader);
|
|
||||||
validationDataReader.push_back(cvDecoderDataReader);
|
|
||||||
|
|
||||||
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
|
||||||
|
|
||||||
delete encoderDataReader;
|
|
||||||
delete decoderDataReader;
|
|
||||||
delete cvEncoderDataReader;
|
|
||||||
delete cvDecoderDataReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoEncoderDecoder<float>(const ConfigParameters& config);
|
|
||||||
template void DoEncoderDecoder<double>(const ConfigParameters& config);
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoBidirectionEncoderDecoder() - implements CNTK "trainBidirectionEncoderDecoder" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
DoBidirecionEncoderDecoder
|
|
||||||
*/
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoBidirectionEncoderDecoder(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
|
|
||||||
ConfigParameters configSGD = config(L"SGD");
|
|
||||||
bool makeMode = config(L"makeMode", "true");
|
|
||||||
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
|
|
||||||
IComputationNetBuilder<ElemType>* forwardDecoderNetBuilder = NULL;
|
|
||||||
IComputationNetBuilder<ElemType>* backwardDecoderNetBuilder = NULL;
|
|
||||||
vector<IComputationNetBuilder<ElemType>*> netBuilders;
|
|
||||||
vector<IDataReader<ElemType>*> trainDataReader;
|
|
||||||
vector<IDataReader<ElemType>*> validationDataReader;
|
|
||||||
|
|
||||||
ConfigParameters readerConfig = config(L"encoderReader");
|
|
||||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
|
|
||||||
|
|
||||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
|
||||||
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters backwardDecoderReaderConfig = config(L"backwardDecoderReader");
|
|
||||||
DataReader<ElemType>* backwardDecoderDataReader = new DataReader<ElemType>(backwardDecoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
|
|
||||||
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
|
|
||||||
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
|
|
||||||
|
|
||||||
ConfigParameters cvBackwardDecoderReaderConfig = config(L"BackwardDecoderCVReader");
|
|
||||||
DataReader<ElemType>* cvBackwardDecoderDataReader = new DataReader<ElemType>(cvBackwardDecoderReaderConfig);
|
|
||||||
|
|
||||||
if (config.Exists("EncoderNetworkBuilder"))
|
|
||||||
{
|
|
||||||
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
|
|
||||||
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
LogicError("Need encoder network");
|
|
||||||
|
|
||||||
if (config.Exists("DecoderNetworkBuilder"))
|
|
||||||
{
|
|
||||||
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
|
|
||||||
forwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("Need decoder networks");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (config.Exists("BackwardDecoderNetworkBuilder"))
|
|
||||||
{
|
|
||||||
ConfigParameters configSNB = config(L"BackwardDecoderNetworkBuilder");
|
|
||||||
backwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("Need decoder networks");
|
|
||||||
}
|
|
||||||
|
|
||||||
MultiNetworksSGD<ElemType> sgd(configSGD);
|
|
||||||
|
|
||||||
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
|
|
||||||
|
|
||||||
netBuilders.push_back(encoderNetBuilder);
|
|
||||||
netBuilders.push_back(forwardDecoderNetBuilder);
|
|
||||||
netBuilders.push_back(backwardDecoderNetBuilder);
|
|
||||||
trainDataReader.push_back(encoderDataReader);
|
|
||||||
trainDataReader.push_back(decoderDataReader);
|
|
||||||
trainDataReader.push_back(backwardDecoderDataReader);
|
|
||||||
validationDataReader.push_back(cvEncoderDataReader);
|
|
||||||
validationDataReader.push_back(cvDecoderDataReader);
|
|
||||||
validationDataReader.push_back(cvBackwardDecoderDataReader);
|
|
||||||
|
|
||||||
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
|
||||||
|
|
||||||
delete encoderDataReader;
|
|
||||||
delete decoderDataReader;
|
|
||||||
delete cvEncoderDataReader;
|
|
||||||
delete cvDecoderDataReader;
|
|
||||||
delete backwardDecoderDataReader;
|
|
||||||
delete cvBackwardDecoderDataReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoBidirectionEncoderDecoder<float>(const ConfigParameters& config);
|
|
||||||
template void DoBidirectionEncoderDecoder<double>(const ConfigParameters& config);
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoEvalEncodingBeamSearchDecoding() - implements CNTK "testEncoderDecoder" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
Originally, this is for testing models trained using the sequence to sequence translation method below
|
|
||||||
http://arxiv.org/pdf/1409.3215.pdf
|
|
||||||
Later on, it is extended to be more general to include a sequence of network operations.
|
|
||||||
*/
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
|
||||||
|
|
||||||
vector<IDataReader<ElemType>*> readers;
|
|
||||||
ConfigParameters readerConfig = config(L"encoderReader");
|
|
||||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType> encoderReader(readerConfig);
|
|
||||||
|
|
||||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
|
||||||
decoderReaderConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType> decoderReader(decoderReaderConfig);
|
|
||||||
|
|
||||||
readers.push_back(&encoderReader);
|
|
||||||
readers.push_back(&decoderReader);
|
|
||||||
|
|
||||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
|
||||||
size_t epochSize = config(L"epochSize", "0");
|
|
||||||
if (epochSize == 0)
|
|
||||||
{
|
|
||||||
epochSize = requestDataSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
wstring encoderModelPath = config(L"encoderModelPath");
|
|
||||||
wstring decoderModelPath = config(L"decoderModelPath");
|
|
||||||
|
|
||||||
intargvector mbSize = minibatchSize;
|
|
||||||
|
|
||||||
int traceLevel = config(L"traceLevel", "0");
|
|
||||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
|
||||||
|
|
||||||
vector<ComputationNetworkPtr> nets;
|
|
||||||
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
|
|
||||||
|
|
||||||
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
|
|
||||||
|
|
||||||
nets.push_back(encoderNet);
|
|
||||||
nets.push_back(decoderNet);
|
|
||||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
|
||||||
vector<wstring> evalNodeNamesVector;
|
|
||||||
for (int i = 0; i < evalNodeNames.size(); ++i)
|
|
||||||
{
|
|
||||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ConfigArray outputNodeNames = config(L"outputNodeNames");
|
|
||||||
vector<wstring> outputNodeNamesVector;
|
|
||||||
for (int i = 0; i < outputNodeNames.size(); ++i)
|
|
||||||
{
|
|
||||||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ElemType beamWidth = config(L"beamWidth", "1");
|
|
||||||
|
|
||||||
ConfigParameters writerConfig = config(L"writer");
|
|
||||||
DataWriter<ElemType> testDataWriter(writerConfig);
|
|
||||||
|
|
||||||
MultiNetworksEvaluator<ElemType> eval(decoderNet, numMBsToShowResult, traceLevel);
|
|
||||||
eval.InitTrainEncoderDecoderWithHiddenStates(config);
|
|
||||||
|
|
||||||
eval.EncodingEvaluateDecodingBeamSearch(nets, readers,
|
|
||||||
testDataWriter, evalNodeNamesVector,
|
|
||||||
outputNodeNamesVector,
|
|
||||||
mbSize[0], beamWidth, epochSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoEvalEncodingBeamSearchDecoding<float>(const ConfigParameters& config);
|
|
||||||
template void DoEvalEncodingBeamSearchDecoding<double>(const ConfigParameters& config);
|
|
||||||
|
|
||||||
// ===========================================================================
|
|
||||||
// DoBeamSearchDecoding() - implements CNTK "beamSearch" command
|
|
||||||
// ===========================================================================
|
|
||||||
|
|
||||||
template <typename ElemType>
|
|
||||||
static void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& reader)
|
|
||||||
{
|
|
||||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
|
||||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
|
||||||
size_t epochSize = config(L"epochSize", "0");
|
|
||||||
if (epochSize == 0)
|
|
||||||
{
|
|
||||||
epochSize = requestDataSize;
|
|
||||||
}
|
|
||||||
wstring modelPath = config(L"modelPath");
|
|
||||||
intargvector mbSize = minibatchSize;
|
|
||||||
|
|
||||||
int traceLevel = config(L"traceLevel", "0");
|
|
||||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
|
||||||
|
|
||||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
|
||||||
|
|
||||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
|
||||||
vector<wstring> evalNodeNamesVector;
|
|
||||||
for (int i = 0; i < evalNodeNames.size(); ++i)
|
|
||||||
{
|
|
||||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ConfigArray outputNodeNames = config(L"outputNodeNames");
|
|
||||||
vector<wstring> outputNodeNamesVector;
|
|
||||||
for (int i = 0; i < outputNodeNames.size(); ++i)
|
|
||||||
{
|
|
||||||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ElemType beamWidth = config(L"beamWidth", "1");
|
|
||||||
|
|
||||||
ConfigParameters writerConfig = config(L"writer");
|
|
||||||
DataWriter<ElemType> testDataWriter(writerConfig);
|
|
||||||
|
|
||||||
MultiNetworksEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
|
||||||
eval.BeamSearch(&reader, testDataWriter, evalNodeNamesVector, outputNodeNamesVector, mbSize[0], beamWidth, epochSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
This is beam search decoder.
|
|
||||||
|
|
||||||
Developed by Kaisheng Yao.
|
|
||||||
|
|
||||||
It is used in the following work:
|
|
||||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" in Interspeech 2015
|
|
||||||
*/
|
|
||||||
template <typename ElemType>
|
|
||||||
void DoBeamSearchDecoding(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
//test
|
|
||||||
ConfigParameters readerConfig = config(L"reader");
|
|
||||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
|
||||||
|
|
||||||
DataReader<ElemType> testDataReader(readerConfig);
|
|
||||||
|
|
||||||
DoEvalBeamSearch(config, testDataReader);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void DoBeamSearchDecoding<float>(const ConfigParameters& config);
|
|
||||||
template void DoBeamSearchDecoding<double>(const ConfigParameters& config);
|
|
|
@ -14,20 +14,12 @@
|
||||||
#include "ComputationNode.h"
|
#include "ComputationNode.h"
|
||||||
#include "DataReader.h"
|
#include "DataReader.h"
|
||||||
#include "DataWriter.h"
|
#include "DataWriter.h"
|
||||||
#include "SimpleNetworkBuilder.h"
|
|
||||||
#include "NDLNetworkBuilder.h"
|
|
||||||
#include "SynchronousExecutionEngine.h"
|
|
||||||
#include "ModelEditLanguage.h"
|
|
||||||
#include "SGD.h"
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "MultiNetworksSGD.h"
|
|
||||||
#include "SimpleEvaluator.h"
|
#include "SimpleEvaluator.h"
|
||||||
#include "SimpleOutputWriter.h"
|
#include "SimpleOutputWriter.h"
|
||||||
#include "MultiNetworksEvaluator.h"
|
|
||||||
#include "BestGpu.h"
|
#include "BestGpu.h"
|
||||||
#include "ScriptableObjects.h"
|
#include "ScriptableObjects.h"
|
||||||
#include "BrainScriptEvaluator.h"
|
#include "BrainScriptEvaluator.h"
|
||||||
#include "BrainScriptParser.h"
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
|
@ -12,22 +12,9 @@
|
||||||
#include "Actions.h"
|
#include "Actions.h"
|
||||||
#include "ComputationNetwork.h"
|
#include "ComputationNetwork.h"
|
||||||
#include "ComputationNode.h"
|
#include "ComputationNode.h"
|
||||||
#include "DataReader.h"
|
|
||||||
#include "DataWriter.h"
|
|
||||||
#include "SimpleNetworkBuilder.h"
|
|
||||||
#include "NDLNetworkBuilder.h"
|
|
||||||
#include "SynchronousExecutionEngine.h"
|
|
||||||
#include "ModelEditLanguage.h"
|
|
||||||
#include "SGD.h"
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "MultiNetworksSGD.h"
|
|
||||||
#include "SimpleEvaluator.h"
|
|
||||||
#include "SimpleOutputWriter.h"
|
|
||||||
#include "MultiNetworksEvaluator.h"
|
|
||||||
#include "BestGpu.h"
|
|
||||||
#include "ScriptableObjects.h"
|
#include "ScriptableObjects.h"
|
||||||
#include "BrainScriptEvaluator.h"
|
#include "BrainScriptEvaluator.h"
|
||||||
#include "BrainScriptParser.h"
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) Microsoft. All rights reserved.
|
||||||
|
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||||
|
//
|
||||||
|
// EsotericActions.cpp -- CNTK actions that are deprecated
|
||||||
|
//
|
||||||
|
|
||||||
|
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
||||||
|
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "Basics.h"
|
||||||
|
#include "Actions.h"
|
||||||
|
#include "ComputationNetwork.h"
|
||||||
|
#include "ComputationNode.h"
|
||||||
|
#include "DataReader.h"
|
||||||
|
#include "DataWriter.h"
|
||||||
|
#include "SimpleNetworkBuilder.h"
|
||||||
|
#include "Config.h"
|
||||||
|
#include "ScriptableObjects.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <chrono>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <queue>
|
||||||
|
#include <set>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#ifndef let
|
||||||
|
#define let const auto
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Microsoft::MSR;
|
||||||
|
using namespace Microsoft::MSR::CNTK;
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// DoConvertFromDbn() - implements CNTK "convertdbn" command
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
template <typename ElemType>
|
||||||
|
void DoConvertFromDbn(const ConfigParameters& config)
|
||||||
|
{
|
||||||
|
wstring modelPath = config(L"modelPath");
|
||||||
|
wstring dbnModelPath = config(L"dbnModelPath");
|
||||||
|
|
||||||
|
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
|
||||||
|
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
|
||||||
|
net->Save(modelPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void DoConvertFromDbn<float>(const ConfigParameters& config);
|
||||||
|
template void DoConvertFromDbn<double>(const ConfigParameters& config);
|
|
@ -20,10 +20,8 @@
|
||||||
#include "ModelEditLanguage.h"
|
#include "ModelEditLanguage.h"
|
||||||
#include "SGD.h"
|
#include "SGD.h"
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "MultiNetworksSGD.h"
|
|
||||||
#include "SimpleEvaluator.h"
|
#include "SimpleEvaluator.h"
|
||||||
#include "SimpleOutputWriter.h"
|
#include "SimpleOutputWriter.h"
|
||||||
#include "MultiNetworksEvaluator.h"
|
|
||||||
#include "BestGpu.h"
|
#include "BestGpu.h"
|
||||||
#include "ScriptableObjects.h"
|
#include "ScriptableObjects.h"
|
||||||
#include "BrainScriptEvaluator.h"
|
#include "BrainScriptEvaluator.h"
|
||||||
|
@ -50,76 +48,15 @@ using namespace Microsoft::MSR::CNTK;
|
||||||
// DoTrain() - implements CNTK "train" command
|
// DoTrain() - implements CNTK "train" command
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
|
|
||||||
template <class ElemType>
|
|
||||||
class BrainScriptNetworkBuilder : public IComputationNetBuilder<ElemType>
|
|
||||||
{
|
|
||||||
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
|
|
||||||
ComputationNetworkPtr m_net;
|
|
||||||
ScriptableObjects::ConfigLambdaPtr m_createNetworkFn;
|
|
||||||
DEVICEID_TYPE m_deviceId;
|
|
||||||
|
|
||||||
public:
|
|
||||||
// the constructor remembers the config lambda
|
|
||||||
// TODO: Really this should just take the lambda itself, or rather, this class should just be replaced by a lambda. But we need the IConfigRecord for templates to be compile-compatible with old CNTK config.
|
|
||||||
BrainScriptNetworkBuilder(const ScriptableObjects::IConfigRecord& config)
|
|
||||||
{
|
|
||||||
m_deviceId = config[L"deviceId"]; // TODO: only needed for LoadNetworkFromFile() which should go away anyway
|
|
||||||
m_createNetworkFn = config[L"createNetwork"].AsPtr<ScriptableObjects::ConfigLambda>();
|
|
||||||
}
|
|
||||||
// not supported for old CNTK
|
|
||||||
BrainScriptNetworkBuilder(const ConfigParameters& config)
|
|
||||||
{
|
|
||||||
NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
// build a ComputationNetwork from description language
|
|
||||||
virtual /*IComputationNetBuilder::*/ ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
|
|
||||||
{
|
|
||||||
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
|
|
||||||
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
|
|
||||||
let netValue = m_createNetworkFn->Apply(move(args), move(namedArgs), L"BuildNetworkFromDescription");
|
|
||||||
m_net = netValue.AsPtr<ComputationNetwork>();
|
|
||||||
if (m_net->GetDeviceId() < 0)
|
|
||||||
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
|
|
||||||
else
|
|
||||||
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int) m_net->GetDeviceId());
|
|
||||||
return m_net;
|
|
||||||
}
|
|
||||||
|
|
||||||
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
|
|
||||||
// TODO: This does not belong into NetworkBuilder, since the code is the same for all. Just create the network and load the darn thing.
|
|
||||||
virtual /*IComputationNetBuilder::*/ ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
|
|
||||||
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
|
|
||||||
{
|
|
||||||
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load --TODO: why all these options?
|
|
||||||
{
|
|
||||||
auto net = make_shared<ComputationNetwork>(m_deviceId);
|
|
||||||
net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
|
|
||||||
m_net = net;
|
|
||||||
}
|
|
||||||
m_net->ResetEvalTimeStamps();
|
|
||||||
return m_net.get();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: decide where these should go. Also, do we need three variables?
|
// TODO: decide where these should go. Also, do we need three variables?
|
||||||
extern wstring standardFunctions;
|
extern wstring standardFunctions;
|
||||||
extern wstring commonMacros;
|
extern wstring commonMacros;
|
||||||
extern wstring computationNodes;
|
extern wstring computationNodes;
|
||||||
|
|
||||||
// helper that returns 'float' or 'double' depending on ElemType
|
// helper that returns 'float' or 'double' depending on ElemType
|
||||||
template <class ElemType>
|
template <class ElemType> static const wchar_t* ElemTypeName();
|
||||||
static const wchar_t* ElemTypeName();
|
template <> /*static*/ const wchar_t* ElemTypeName<float>() { return L"float"; }
|
||||||
template <>
|
template <> /*static*/ const wchar_t* ElemTypeName<double>() { return L"double"; }
|
||||||
/*static*/ const wchar_t* ElemTypeName<float>()
|
|
||||||
{
|
|
||||||
return L"float";
|
|
||||||
}
|
|
||||||
template <>
|
|
||||||
/*static*/ const wchar_t* ElemTypeName<double>()
|
|
||||||
{
|
|
||||||
return L"double";
|
|
||||||
}
|
|
||||||
|
|
||||||
function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
|
function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
|
||||||
{
|
{
|
||||||
|
|
|
@ -75,51 +75,50 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
||||||
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
|
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
|
||||||
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
|
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
|
||||||
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
|
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
|
||||||
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
|
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
|
||||||
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
|
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
|
||||||
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
|
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
|
||||||
UnaryStandardNode(Cosine, x)
|
UnaryStandardNode(Cosine, x)
|
||||||
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
|
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
|
||||||
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
|
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
|
||||||
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
|
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
|
||||||
UnaryStandardNode(Dropout, activationVectorSequence)
|
UnaryStandardNode(Dropout, activationVectorSequence)
|
||||||
//BinaryStandardNode(DummyCriterionNode)
|
//BinaryStandardNode(DummyCriterionNode)
|
||||||
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
|
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
|
||||||
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
|
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
|
||||||
UnaryStandardNode(Exp, x)
|
UnaryStandardNode(Exp, x)
|
||||||
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
|
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
|
||||||
UnaryStandardNode(InvStdDev, dataVectorSequence)
|
UnaryStandardNode(InvStdDev, dataVectorSequence)
|
||||||
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
|
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
|
||||||
//BinaryStandardNode(LSTMNode)
|
//BinaryStandardNode(LSTMNode)
|
||||||
UnaryStandardNode(Log, x)
|
UnaryStandardNode(Log, x)
|
||||||
UnaryStandardNode(LogSoftmax, z)
|
UnaryStandardNode(LogSoftmax, z)
|
||||||
//BinaryStandardNode(LookupTableNode)
|
//BinaryStandardNode(LookupTableNode)
|
||||||
UnaryStandardNode(MatrixL1Reg, matrix)
|
UnaryStandardNode(MatrixL1Reg, matrix)
|
||||||
UnaryStandardNode(MatrixL2Reg, matrix)
|
UnaryStandardNode(MatrixL2Reg, matrix)
|
||||||
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
|
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
|
||||||
UnaryStandardNode(Mean, dataVectorSequence)
|
UnaryStandardNode(Mean, dataVectorSequence)
|
||||||
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
|
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
|
||||||
UnaryStandardNode(Negate, input)
|
UnaryStandardNode(Negate, input)
|
||||||
//BinaryStandardNode(NoiseContrastiveEstimationNode)
|
//BinaryStandardNode(NoiseContrastiveEstimationNode)
|
||||||
//BinaryStandardNode(PairNetworkNode)
|
|
||||||
//BinaryStandardNode(ParallelNode)
|
//BinaryStandardNode(ParallelNode)
|
||||||
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
|
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
|
||||||
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
|
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
|
||||||
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
|
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
|
||||||
UnaryStandardNode(RectifiedLinear, z)
|
UnaryStandardNode(RectifiedLinear, z)
|
||||||
//BinaryStandardNode(RowElementTimesNode)
|
//BinaryStandardNode(RowElementTimesNode)
|
||||||
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
|
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
|
||||||
//BinaryStandardNode(SequenceDecoderNode)
|
//BinaryStandardNode(SequenceDecoderNode)
|
||||||
UnaryStandardNode(Sigmoid, z)
|
UnaryStandardNode(Sigmoid, z)
|
||||||
UnaryStandardNode(Softmax, z)
|
UnaryStandardNode(Softmax, z)
|
||||||
UnaryStandardNode(Hardmax, z)
|
UnaryStandardNode(Hardmax, z)
|
||||||
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
|
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
|
||||||
//BinaryStandardNode(StrideTimesNode)
|
//BinaryStandardNode(StrideTimesNode)
|
||||||
//BinaryStandardNode(SumColumnElementsNode)
|
//BinaryStandardNode(SumColumnElementsNode)
|
||||||
UnaryStandardNode(SumElements, matrix)
|
UnaryStandardNode(SumElements, matrix)
|
||||||
UnaryStandardNode(Tanh, z)
|
UnaryStandardNode(Tanh, z)
|
||||||
UnaryStandardNode(TimeReverse, vectorSequence)
|
UnaryStandardNode(TimeReverse, vectorSequence)
|
||||||
BinaryStandardNode(Times, leftMatrix, rightMatrix)
|
BinaryStandardNode(Times, leftMatrix, rightMatrix)
|
||||||
UnaryStandardNode(Transpose, matrix)
|
UnaryStandardNode(Transpose, matrix)
|
||||||
//BinaryStandardNode(TransposeTimesNode)
|
//BinaryStandardNode(TransposeTimesNode)
|
||||||
;
|
;
|
||||||
|
|
|
@ -23,10 +23,8 @@
|
||||||
#include "SGD.h"
|
#include "SGD.h"
|
||||||
#include "MPIWrapper.h"
|
#include "MPIWrapper.h"
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "MultiNetworksSGD.h"
|
|
||||||
#include "SimpleEvaluator.h"
|
#include "SimpleEvaluator.h"
|
||||||
#include "SimpleOutputWriter.h"
|
#include "SimpleOutputWriter.h"
|
||||||
#include "MultiNetworksEvaluator.h"
|
|
||||||
#include "BestGpu.h"
|
#include "BestGpu.h"
|
||||||
#include "ProgressTracing.h"
|
#include "ProgressTracing.h"
|
||||||
#include "fileutil.h"
|
#include "fileutil.h"
|
||||||
|
@ -240,10 +238,6 @@ void DoCommands(const ConfigParameters& config)
|
||||||
{
|
{
|
||||||
DoEval<ElemType>(commandParams);
|
DoEval<ElemType>(commandParams);
|
||||||
}
|
}
|
||||||
else if (action[j] == "testunroll")
|
|
||||||
{
|
|
||||||
DoEvalUnroll<ElemType>(commandParams);
|
|
||||||
}
|
|
||||||
else if (action[j] == "edit")
|
else if (action[j] == "edit")
|
||||||
{
|
{
|
||||||
DoEdit<ElemType>(commandParams);
|
DoEdit<ElemType>(commandParams);
|
||||||
|
@ -284,22 +278,6 @@ void DoCommands(const ConfigParameters& config)
|
||||||
{
|
{
|
||||||
DoParameterSVD<ElemType>(commandParams);
|
DoParameterSVD<ElemType>(commandParams);
|
||||||
}
|
}
|
||||||
else if (action[j] == "trainEncoderDecoder")
|
|
||||||
{
|
|
||||||
DoEncoderDecoder<ElemType>(commandParams);
|
|
||||||
}
|
|
||||||
else if (action[j] == "testEncoderDecoder")
|
|
||||||
{
|
|
||||||
DoEvalEncodingBeamSearchDecoding<ElemType>(commandParams);
|
|
||||||
}
|
|
||||||
else if (action[j] == "trainBidirectionEncoderDecoder")
|
|
||||||
{
|
|
||||||
DoBidirectionEncoderDecoder<ElemType>(commandParams);
|
|
||||||
}
|
|
||||||
else if (action[j] == "beamSearch")
|
|
||||||
{
|
|
||||||
DoBeamSearchDecoding<ElemType>(commandParams);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());
|
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());
|
||||||
|
|
|
@ -280,8 +280,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
||||||
ret = true;
|
ret = true;
|
||||||
else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
|
else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
|
||||||
ret = true;
|
ret = true;
|
||||||
else if (EqualInsensitive(nodeType, OperationNameOf(PairNetworkNode), L"PairNetwork"))
|
|
||||||
ret = true;
|
|
||||||
else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
|
else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
|
||||||
ret = true;
|
ret = true;
|
||||||
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))
|
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))
|
||||||
|
|
|
@ -59,9 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
||||||
case RCRF:
|
case RCRF:
|
||||||
net = BuildSeqTrnLSTMNetworkFromDescription();
|
net = BuildSeqTrnLSTMNetworkFromDescription();
|
||||||
break;
|
break;
|
||||||
case LSTMENCODER:
|
|
||||||
net = BuildLSTMEncoderNetworkFromDescription();
|
|
||||||
break;
|
|
||||||
case UNIDIRECTIONALLSTM:
|
case UNIDIRECTIONALLSTM:
|
||||||
net = BuildUnidirectionalLSTMNetworksFromDescription();
|
net = BuildUnidirectionalLSTMNetworksFromDescription();
|
||||||
break;
|
break;
|
||||||
|
@ -72,35 +69,12 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
||||||
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
|
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
|
||||||
}
|
}
|
||||||
|
|
||||||
// post-process the network
|
// post-process the network
|
||||||
#if 1
|
|
||||||
net->CompileNetwork();
|
net->CompileNetwork();
|
||||||
#else
|
|
||||||
net->ValidateNetwork(false /*allowFragment*/, true /*bAllowNoCriterion*/); // no criterion possible because ...TODO: what's the reason?
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return net;
|
return net;
|
||||||
}
|
}
|
||||||
|
|
||||||
// special version for a deprecated implementation of sequence-to-sequence models
|
|
||||||
template <class ElemType>
|
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
|
|
||||||
{
|
|
||||||
ComputationNetworkPtr net;
|
|
||||||
switch (m_rnnType)
|
|
||||||
{
|
|
||||||
case ALIGNMENTSIMILARITYGENERATOR:
|
|
||||||
net = BuildAlignmentDecoderNetworkFromDescription(encoderNet);
|
|
||||||
net->CompileNetwork();
|
|
||||||
return net;
|
|
||||||
case ALIGNMENTSIMILARITYGFORWARDDECODER:
|
|
||||||
net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet);
|
|
||||||
net->CompileNetwork();
|
|
||||||
return net;
|
|
||||||
}
|
|
||||||
return BuildNetworkFromDescription();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
||||||
{
|
{
|
||||||
|
@ -530,265 +504,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
||||||
return m_net;
|
return m_net;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
this builds an alignment based LM generator
|
|
||||||
the aligment node takes a variable length input and relates each element to a variable length output
|
|
||||||
*/
|
|
||||||
template <class ElemType>
|
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
|
|
||||||
{
|
|
||||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
|
||||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
|
||||||
{
|
|
||||||
unsigned long randomSeed = 1;
|
|
||||||
|
|
||||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
|
||||||
|
|
||||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
|
||||||
|
|
||||||
ComputationNodePtr input, encoderOutput, e,
|
|
||||||
b, w, u, pastValue, output, label, alignoutput;
|
|
||||||
ComputationNodePtr clslogpostprob;
|
|
||||||
ComputationNodePtr clsweight;
|
|
||||||
ComputationNodePtr columnStride, rowStride;
|
|
||||||
|
|
||||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
|
||||||
m_net->FeatureNodes().push_back(input);
|
|
||||||
|
|
||||||
if (m_lookupTableOrder > 0)
|
|
||||||
{
|
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
|
||||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
output = builder.LookupTable(e, input, L"LookupTable");
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
|
||||||
}
|
|
||||||
|
|
||||||
int recur_idx = 0;
|
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
|
||||||
|
|
||||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
|
||||||
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
|
|
||||||
if (encoderPairNodes.size() != 1)
|
|
||||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
|
|
||||||
|
|
||||||
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
|
|
||||||
|
|
||||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
|
||||||
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
|
|
||||||
if (encoderEvaluationNodes.size() != 1)
|
|
||||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
|
|
||||||
|
|
||||||
if (numHiddenLayers > 0)
|
|
||||||
{
|
|
||||||
int i = 1 + offset;
|
|
||||||
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
|
|
||||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
|
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
|
|
||||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
|
|
||||||
/// alignment node to get weights from source to target
|
|
||||||
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
|
|
||||||
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
|
|
||||||
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
|
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
|
|
||||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
|
|
||||||
columnStride->Value().SetValue(1);
|
|
||||||
columnStride->SetParameterUpdateRequired(false);
|
|
||||||
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
|
|
||||||
rowStride->Value().SetValue(0);
|
|
||||||
rowStride->SetParameterUpdateRequired(false);
|
|
||||||
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
|
|
||||||
|
|
||||||
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
|
|
||||||
|
|
||||||
output = ApplyNonlinearFunction(
|
|
||||||
builder.Plus(
|
|
||||||
builder.Times(u, input), builder.Times(w, alignoutput)),
|
|
||||||
0);
|
|
||||||
pastValue->AttachInputs(output);
|
|
||||||
input = output;
|
|
||||||
|
|
||||||
for (; i < numHiddenLayers; i++)
|
|
||||||
{
|
|
||||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
|
||||||
label = builder.CreateInputNode(L"labels", 4);
|
|
||||||
|
|
||||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
|
||||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
|
|
||||||
|
|
||||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
|
||||||
|
|
||||||
m_net->PairNodes().push_back(input);
|
|
||||||
|
|
||||||
m_net->OutputNodes().push_back(output);
|
|
||||||
|
|
||||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
|
||||||
output = builder.Softmax(output, L"PosteriorProb");
|
|
||||||
}
|
|
||||||
|
|
||||||
return m_net;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class ElemType>
|
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
|
|
||||||
{
|
|
||||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
|
||||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
|
||||||
{
|
|
||||||
unsigned long randomSeed = 1;
|
|
||||||
|
|
||||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
|
||||||
|
|
||||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
|
||||||
|
|
||||||
ComputationNodePtr input, encoderOutput, e,
|
|
||||||
b, w, u, pastValue, output, label, alignoutput;
|
|
||||||
ComputationNodePtr clslogpostprob;
|
|
||||||
ComputationNodePtr clsweight;
|
|
||||||
ComputationNodePtr columnStride, rowStride;
|
|
||||||
|
|
||||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
|
||||||
m_net->FeatureNodes().push_back(input);
|
|
||||||
|
|
||||||
if (m_lookupTableOrder > 0)
|
|
||||||
{
|
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
|
||||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
output = builder.LookupTable(e, input, L"LookupTable");
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
|
||||||
}
|
|
||||||
|
|
||||||
int recur_idx = 0;
|
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
|
||||||
|
|
||||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
|
||||||
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
|
|
||||||
if (encoderPairNodes.size() != 1)
|
|
||||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
|
|
||||||
|
|
||||||
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
|
|
||||||
|
|
||||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
|
||||||
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
|
|
||||||
if (encoderEvaluationNodes.size() != 1)
|
|
||||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
|
|
||||||
|
|
||||||
if (numHiddenLayers > 0)
|
|
||||||
{
|
|
||||||
int i = 1 + offset;
|
|
||||||
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
|
|
||||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
|
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
|
|
||||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
|
|
||||||
/// alignment node to get weights from source to target
|
|
||||||
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
|
|
||||||
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
|
|
||||||
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
|
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
|
|
||||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
|
|
||||||
columnStride->Value().SetValue(1);
|
|
||||||
columnStride->SetParameterUpdateRequired(false);
|
|
||||||
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
|
|
||||||
rowStride->Value().SetValue(0);
|
|
||||||
rowStride->SetParameterUpdateRequired(false);
|
|
||||||
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
|
|
||||||
|
|
||||||
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
|
|
||||||
|
|
||||||
output = ApplyNonlinearFunction(
|
|
||||||
builder.Plus(
|
|
||||||
builder.Times(u, input), builder.Times(w, alignoutput)),
|
|
||||||
0);
|
|
||||||
pastValue->AttachInputs(output);
|
|
||||||
input = output;
|
|
||||||
|
|
||||||
for (; i < numHiddenLayers; i++)
|
|
||||||
{
|
|
||||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
|
||||||
label = builder.CreateInputNode(L"labels", 4);
|
|
||||||
|
|
||||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
|
||||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
|
|
||||||
|
|
||||||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
|
||||||
clslogpostprob);
|
|
||||||
|
|
||||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
|
||||||
|
|
||||||
m_net->PairNodes().push_back(input);
|
|
||||||
|
|
||||||
m_net->OutputNodes().push_back(output);
|
|
||||||
|
|
||||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
|
||||||
output = builder.Softmax(output, L"PosteriorProb");
|
|
||||||
}
|
|
||||||
|
|
||||||
return m_net;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
|
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
|
||||||
{
|
{
|
||||||
|
@ -1608,95 +1323,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
||||||
return m_net;
|
return m_net;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
This is encoder LSTM described in the following papers:
|
|
||||||
H. Sutskever, O. Vinyals and Q. V. Le, "Sequence to sequence learning with neural networks", http://arxiv.org/abs/1409.3215
|
|
||||||
|
|
||||||
The following code constructs the encoder and, to construct decoder, use BuildLSTMNetworkFromDescription
|
|
||||||
|
|
||||||
Developed by Kaisheng Yao
|
|
||||||
This is used in the following works:
|
|
||||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
|
|
||||||
*/
|
|
||||||
template <class ElemType>
|
|
||||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription()
|
|
||||||
{
|
|
||||||
|
|
||||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
|
||||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
|
||||||
{
|
|
||||||
ULONG randomSeed = 1;
|
|
||||||
|
|
||||||
size_t i = 0;
|
|
||||||
size_t numHiddenLayers = m_layerSizes.size() - 1;
|
|
||||||
|
|
||||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
|
||||||
|
|
||||||
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
|
|
||||||
|
|
||||||
if (m_sparse_input)
|
|
||||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
|
||||||
else
|
|
||||||
input = builder.CreateInputNode(L"features", m_layerSizes[0]);
|
|
||||||
|
|
||||||
m_net->FeatureNodes().push_back(input);
|
|
||||||
|
|
||||||
if (m_applyMeanVarNorm)
|
|
||||||
{
|
|
||||||
w = builder.Mean(input);
|
|
||||||
b = builder.InvStdDev(input);
|
|
||||||
output = builder.PerDimMeanVarNormalization(input, w, b);
|
|
||||||
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_lookupTableOrder > 0)
|
|
||||||
{
|
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"EncoderE%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
|
||||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
|
||||||
output = builder.LookupTable(e, input, L"EncoderLookupTable");
|
|
||||||
#ifdef DEBUG_DECODER
|
|
||||||
e->Value().SetValue((ElemType) 0.01);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// direct connect from input node to output node
|
|
||||||
|
|
||||||
int recur_idx = 0;
|
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
|
||||||
if (numHiddenLayers > 0)
|
|
||||||
{
|
|
||||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
|
||||||
input = output;
|
|
||||||
i++;
|
|
||||||
|
|
||||||
for (; i < numHiddenLayers; i++)
|
|
||||||
{
|
|
||||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
|
||||||
|
|
||||||
if (m_addDropoutNodes)
|
|
||||||
input = builder.Dropout(output);
|
|
||||||
else
|
|
||||||
input = output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
m_net->OutputNodes().push_back(output);
|
|
||||||
m_net->PairNodes().push_back(output); /// need to provide pairnodes so that the next layer of network can connect to this network
|
|
||||||
m_net->EvaluationNodes().push_back(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
return m_net;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
|
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
|
||||||
|
|
||||||
|
|
|
@ -38,16 +38,13 @@ enum RNNTYPE
|
||||||
DEEPRNN = 4,
|
DEEPRNN = 4,
|
||||||
CLASSLM = 8,
|
CLASSLM = 8,
|
||||||
LBLM = 16,
|
LBLM = 16,
|
||||||
LSTMENCODER = 18,
|
|
||||||
NPLM = 32,
|
NPLM = 32,
|
||||||
CLASSLSTM = 64,
|
CLASSLSTM = 64,
|
||||||
NCELSTM = 128,
|
NCELSTM = 128,
|
||||||
CLSTM = 256,
|
CLSTM = 256,
|
||||||
RCRF = 512,
|
RCRF = 512,
|
||||||
UNIDIRECTIONALLSTM = 19,
|
UNIDIRECTIONALLSTM = 19,
|
||||||
BIDIRECTIONALLSTM = 20,
|
BIDIRECTIONALLSTM = 20
|
||||||
ALIGNMENTSIMILARITYGENERATOR = 21,
|
|
||||||
ALIGNMENTSIMILARITYGFORWARDDECODER = 22
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class TrainingCriterion : int // TODO: camel-case these
|
enum class TrainingCriterion : int // TODO: camel-case these
|
||||||
|
@ -191,18 +188,12 @@ public:
|
||||||
m_rnnType = CLSTM;
|
m_rnnType = CLSTM;
|
||||||
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
|
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
|
||||||
m_rnnType = RCRF;
|
m_rnnType = RCRF;
|
||||||
else if (std::find(strType.begin(), strType.end(), L"LSTMENCODER") != strType.end())
|
|
||||||
m_rnnType = LSTMENCODER;
|
|
||||||
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
|
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
|
||||||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||||
m_rnnType = UNIDIRECTIONALLSTM;
|
m_rnnType = UNIDIRECTIONALLSTM;
|
||||||
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
|
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
|
||||||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||||
m_rnnType = BIDIRECTIONALLSTM;
|
m_rnnType = BIDIRECTIONALLSTM;
|
||||||
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGENERATOR") != strType.end())
|
|
||||||
m_rnnType = ALIGNMENTSIMILARITYGENERATOR;
|
|
||||||
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGFORWARDDECODER") != strType.end())
|
|
||||||
m_rnnType = ALIGNMENTSIMILARITYGFORWARDDECODER;
|
|
||||||
else
|
else
|
||||||
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
|
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
|
||||||
}
|
}
|
||||||
|
@ -255,7 +246,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
ComputationNetworkPtr BuildNetworkFromDescription();
|
ComputationNetworkPtr BuildNetworkFromDescription();
|
||||||
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet); // legacy support of deprecated sequence-to-sequence implementation
|
|
||||||
|
|
||||||
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
|
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
|
||||||
|
|
||||||
|
@ -287,8 +277,6 @@ protected:
|
||||||
|
|
||||||
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
|
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
|
||||||
|
|
||||||
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();
|
|
||||||
|
|
||||||
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
|
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
|
||||||
|
|
||||||
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
|
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
|
||||||
|
@ -299,10 +287,6 @@ protected:
|
||||||
|
|
||||||
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
|
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
|
||||||
|
|
||||||
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
|
|
||||||
|
|
||||||
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
|
|
||||||
|
|
||||||
//layer is 0 based
|
//layer is 0 based
|
||||||
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
|
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
|
||||||
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);
|
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);
|
||||||
|
|
|
@ -183,7 +183,6 @@ static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr>& nes
|
||||||
// This sets index, lowLink, m_visited, and m_inStack.
|
// This sets index, lowLink, m_visited, and m_inStack.
|
||||||
void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
|
void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
|
||||||
{
|
{
|
||||||
// notice that this graph including graphs from a parent networks if two or more networks are connected via PairNetworkNode
|
|
||||||
list<ComputationNodeBasePtr> sccStack;
|
list<ComputationNodeBasePtr> sccStack;
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).
|
size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).
|
||||||
|
|
|
@ -100,8 +100,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
||||||
return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
|
return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
|
||||||
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
|
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
|
||||||
return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
|
return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||||
else if (nodeType == OperationNameOf(PairNetworkNode))
|
|
||||||
return New<PairNetworkNode<ElemType>>(forward<_Types>(_Args)...);
|
|
||||||
else if (nodeType == OperationNameOf(ParallelNode))
|
else if (nodeType == OperationNameOf(ParallelNode))
|
||||||
return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
|
return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
|
||||||
else if (nodeType == OperationNameOf(PastValueNode))
|
else if (nodeType == OperationNameOf(PastValueNode))
|
||||||
|
@ -293,12 +291,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
||||||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
|
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ElemType>
|
|
||||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols)
|
|
||||||
{
|
|
||||||
return net.AddNodeToNetWithElemType(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), inputName, rows, cols));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
|
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
|
||||||
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
||||||
|
@ -342,17 +334,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
||||||
// The following functions create nodes and link them to the network and their inputs.
|
// The following functions create nodes and link them to the network and their inputs.
|
||||||
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
||||||
|
|
||||||
template <class ElemType>
|
|
||||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName)
|
|
||||||
{
|
|
||||||
if (net.GetNodeFromName(a->NodeName(), nullptr, false) != nullptr)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "PairNetwork: asked to pair a node with name %ls in another network. However, this network has already a node with the same name. Should avoid this case.\n", a->NodeName().c_str());
|
|
||||||
RuntimeError("PairNetwork: asked to pair a node with name in another network. However, this network has already a node with the same name. Should avoid this case.\n");
|
|
||||||
}
|
|
||||||
return net.AddNodeToNetAndAttachInputs(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), nodeName), a);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
|
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
|
||||||
const ComputationNodePtr inputValues,
|
const ComputationNodePtr inputValues,
|
||||||
|
|
|
@ -51,7 +51,6 @@ public:
|
||||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
|
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
|
||||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||||
ComputationNodePtr CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols);
|
|
||||||
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
|
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
|
||||||
ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
||||||
ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
||||||
|
@ -60,7 +59,6 @@ public:
|
||||||
ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
|
ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
|
||||||
// The following functions create nodes and link them to the network and their inputs.
|
// The following functions create nodes and link them to the network and their inputs.
|
||||||
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
||||||
ComputationNodePtr PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName = L"");
|
|
||||||
ComputationNodePtr Convolution(const ComputationNodePtr weight,
|
ComputationNodePtr Convolution(const ComputationNodePtr weight,
|
||||||
const ComputationNodePtr inputValues,
|
const ComputationNodePtr inputValues,
|
||||||
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
||||||
|
|
|
@ -1662,98 +1662,6 @@ public:
|
||||||
template class StrideTimesNode<float>;
|
template class StrideTimesNode<float>;
|
||||||
template class StrideTimesNode<double>;
|
template class StrideTimesNode<double>;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
// PairNetworkNode (input)
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
pair this node to a node in another network
|
|
||||||
this node provide an interface from this network. The next layer network then can use this interface to know which node to connect to.
|
|
||||||
*/
|
|
||||||
template <class ElemType>
|
|
||||||
class PairNetworkNode : public ComputationNode<ElemType>, public NumInputs<1>
|
|
||||||
{
|
|
||||||
typedef ComputationNode<ElemType> Base;
|
|
||||||
UsingComputationNodeMembersBoilerplate;
|
|
||||||
static const std::wstring TypeName()
|
|
||||||
{
|
|
||||||
return L"PairNetwork";
|
|
||||||
}
|
|
||||||
|
|
||||||
void Init(size_t row_size, size_t /*col_size*/)
|
|
||||||
{
|
|
||||||
CreateMatrixIfNull(m_value);
|
|
||||||
SetDims(TensorShape(row_size), HasMBLayout());
|
|
||||||
UpdateFunctionValuesSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
DeclareConstructorFromConfigWithNumInputs(PairNetworkNode);
|
|
||||||
PairNetworkNode(DEVICEID_TYPE deviceId, const wstring& name, size_t row_size = 1, size_t col_size = 1)
|
|
||||||
: Base(deviceId, name)
|
|
||||||
{
|
|
||||||
Init(row_size, col_size);
|
|
||||||
CreateMatrixIfNull(m_gradient);
|
|
||||||
m_gradient->Resize(row_size, col_size);
|
|
||||||
m_gradient->SetValue(0.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void Load(File& fstream, size_t modelVersion) override
|
|
||||||
{
|
|
||||||
Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
|
|
||||||
Base::Load(fstream, modelVersion);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// to-do: need to change to the new way of resetting state
|
|
||||||
void BackpropToMap(const size_t inputIndex)
|
|
||||||
{
|
|
||||||
if (inputIndex > 0)
|
|
||||||
InvalidArgument("PairNetwork operation only takes one input.");
|
|
||||||
|
|
||||||
Matrix<ElemType>::ScaleAndAdd(1.0, Gradient(), Input(inputIndex)->Gradient());
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
|
||||||
{
|
|
||||||
if (fr.IsAllFrames())
|
|
||||||
{
|
|
||||||
BackpropToMap(inputIndex);
|
|
||||||
return;
|
|
||||||
} // TODO: remove these one by one
|
|
||||||
assert(GetSampleMatrixNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
|
|
||||||
assert(m_pMBLayout);
|
|
||||||
|
|
||||||
Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
|
|
||||||
Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
|
||||||
{
|
|
||||||
Matrix<ElemType> mTmp = ValueFor(fr);
|
|
||||||
mTmp.SetValue(Input(0)->ValueFor(fr));
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
|
||||||
{
|
|
||||||
Base::Validate(isFinalValidationPass);
|
|
||||||
InferMBLayoutFromInputsForStandardCase();
|
|
||||||
|
|
||||||
SetDims(Input(0));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template class PairNetworkNode<float>;
|
|
||||||
template class PairNetworkNode<double>;
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// ParallelNode (input0, input1)
|
// ParallelNode (input0, input1)
|
||||||
// TODO: How is this different from RowStack?
|
// TODO: How is this different from RowStack?
|
||||||
|
|
|
@ -2,12 +2,12 @@
|
||||||
// Copyright (c) Microsoft. All rights reserved.
|
// Copyright (c) Microsoft. All rights reserved.
|
||||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||||
//
|
//
|
||||||
//helpful macros
|
// helpful macros
|
||||||
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
|
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
|
||||||
|
|
||||||
//iterators
|
//#pragma once
|
||||||
//
|
|
||||||
#pragma once
|
// iterators
|
||||||
#undef foreach_row
|
#undef foreach_row
|
||||||
#undef foreach_column
|
#undef foreach_column
|
||||||
#undef foreach_coord
|
#undef foreach_coord
|
||||||
|
@ -19,5 +19,5 @@
|
||||||
for (long _i = 0; _i < (_m).GetNumRows(); _i++)
|
for (long _i = 0; _i < (_m).GetNumRows(); _i++)
|
||||||
#define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
|
#define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
|
||||||
|
|
||||||
//this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
|
// this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
|
||||||
#define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))
|
#define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
#include "CompositeComputationNodes.h" // for PrecomputeNode
|
#include "CompositeComputationNodes.h" // for PrecomputeNode
|
||||||
#include "SimpleEvaluator.h"
|
#include "SimpleEvaluator.h"
|
||||||
#include "DataReader.h"
|
#include "DataReader.h"
|
||||||
#include "IComputationNetBuilder.h"
|
|
||||||
#include "ScriptableObjects.h"
|
#include "ScriptableObjects.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "DataReader.h"
|
#include "DataReader.h"
|
||||||
#include "ComputationNetwork.h"
|
#include "ComputationNetwork.h"
|
||||||
#include "DataReaderHelpers.h"
|
#include "DataReaderHelpers.h"
|
||||||
|
#include "Helpers.h"
|
||||||
#include "fileutil.h"
|
#include "fileutil.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
Загрузка…
Ссылка в новой задаче