removed two more Multinetworks configs from SimpleNetworkBuilder
This commit is contained in:
Родитель
482a6a4307
Коммит
6d31cda88a
2
Makefile
2
Makefile
|
@ -462,7 +462,7 @@ CNTK_SRC =\
|
|||
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/OtherActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/EsotericActions.cpp \
|
||||
$(SOURCEDIR)/ActionsLib/SpecialPurposeActions.cpp \
|
||||
$(SOURCEDIR)/SequenceTrainingLib/latticeforwardbackward.cpp \
|
||||
$(SOURCEDIR)/SequenceTrainingLib/parallelforwardbackward.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
|
||||
|
|
|
@ -59,12 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
case RCRF:
|
||||
net = BuildSeqTrnLSTMNetworkFromDescription();
|
||||
break;
|
||||
case UNIDIRECTIONALLSTM:
|
||||
net = BuildUnidirectionalLSTMNetworksFromDescription();
|
||||
break;
|
||||
case BIDIRECTIONALLSTM:
|
||||
net = BuildBiDirectionalLSTMNetworksFromDescription();
|
||||
break;
|
||||
default:
|
||||
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
|
||||
}
|
||||
|
@ -1323,130 +1317,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
|||
return m_net;
|
||||
}
|
||||
|
||||
/**
|
||||
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
|
||||
|
||||
Because the past prediction is used, decoding requires beam search decoder
|
||||
|
||||
Developed by Kaisheng Yao
|
||||
This is used in the following work
|
||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" submitted to Interspeech 2015
|
||||
*/
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription()
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
ULONG randomSeed = 1;
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
size_t dims = 0;
|
||||
|
||||
ComputationNodePtr input, w, b, u, e, Wxo, output, label, prior;
|
||||
vector<ComputationNodePtr> streams;
|
||||
vector<size_t> streamdims;
|
||||
ComputationNodePtr inputforward, inputbackward, inputletter;
|
||||
ComputationNodePtr transcription_prediction;
|
||||
|
||||
map<wstring, size_t> featDim;
|
||||
|
||||
assert(m_streamSizes.size() > 0);
|
||||
inputbackward = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
|
||||
m_net->FeatureNodes().push_back(inputbackward);
|
||||
featDim[L"featurepastValueedTarget"] = m_streamSizes[0];
|
||||
|
||||
inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
|
||||
m_net->FeatureNodes().push_back(inputletter);
|
||||
featDim[L"ltrForward"] = m_streamSizes[1];
|
||||
|
||||
size_t layerIdx = 0;
|
||||
size_t idx = 0;
|
||||
int recur_idx = 0;
|
||||
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
|
||||
{
|
||||
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
|
||||
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
|
||||
w = builder.Mean(input);
|
||||
b = builder.InvStdDev(input);
|
||||
output = builder.PerDimMeanVarNormalization(input, w, b);
|
||||
|
||||
input = output;
|
||||
}
|
||||
|
||||
size_t idim = input->GetSampleMatrixNumRows();
|
||||
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
|
||||
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));
|
||||
|
||||
streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
|
||||
input = output;
|
||||
streams.push_back(input);
|
||||
}
|
||||
|
||||
layerIdx++;
|
||||
|
||||
output = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");
|
||||
input = output;
|
||||
dims = streamdims[0] + streamdims[1];
|
||||
|
||||
/// now merge the streams
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
while (layerIdx < numHiddenLayers)
|
||||
{
|
||||
switch (m_rnnType)
|
||||
{
|
||||
case UNIDIRECTIONALLSTM:
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
|
||||
break;
|
||||
default:
|
||||
LogicError("This is for unidorectional LSTM model. Check rnntype to see whether it is UNIDIRECTIONALLSTMWITHPASTPREDICTION or TRANSDUCER");
|
||||
}
|
||||
|
||||
layerIdx++;
|
||||
dims = m_layerSizes[layerIdx];
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
|
||||
/// directly connect transcription model output/feature to the output layer
|
||||
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
|
||||
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
output = builder.Times(Wxo, input);
|
||||
input = output;
|
||||
|
||||
/// here uses "labels", so only one label from multiple stream inputs are used.
|
||||
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);
|
||||
|
||||
AddTrainAndEvalCriterionNodes(input, label, w);
|
||||
|
||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
||||
output = builder.Softmax(input, L"outputs");
|
||||
|
||||
if (m_needPrior)
|
||||
{
|
||||
prior = builder.Mean(label);
|
||||
input = builder.Log(prior, L"LogOfPrior");
|
||||
ComputationNodePtr scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
|
||||
m_net->OutputNodes().push_back(scaledLogLikelihood);
|
||||
}
|
||||
else
|
||||
m_net->OutputNodes().push_back(output);
|
||||
}
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilder<ElemType>::BuildLSTMComponentWithMultiInputs(ULONG& randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse)
|
||||
{
|
||||
|
@ -1637,163 +1507,6 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
|
|||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
Build a bi-directional LSTM network to compute the following
|
||||
p(y_t | y_1^{t-1}, x_1^T)
|
||||
The target side for y_t is a LSTM language model with past prediction y_{t-1} as its input. This language model also uses
|
||||
the outputs from the forwawrd direction LSTM and the output from the backward direction LSTM that are operated on the source side.
|
||||
|
||||
Developed by Kaisheng Yao.
|
||||
This is used in the following works:
|
||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
|
||||
*/
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription()
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
ULONG randomSeed = 1;
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
|
||||
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior, Wxo;
|
||||
ComputationNodePtr forwardInput, forwardOutput, backwardInput, backwardOutput;
|
||||
vector<ComputationNodePtr> streams;
|
||||
vector<size_t> streamdims;
|
||||
ComputationNodePtr inputprediction, inputletter, ngram;
|
||||
ComputationNodePtr ltrSource;
|
||||
size_t ltrDim = 0;
|
||||
|
||||
map<wstring, size_t> featDim;
|
||||
|
||||
size_t ltrSrcIdx = 1;
|
||||
/// create projections to use pastValue predictions
|
||||
inputprediction = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
|
||||
m_net->FeatureNodes().push_back(inputprediction);
|
||||
|
||||
inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
|
||||
m_net->FeatureNodes().push_back(inputletter);
|
||||
featDim[L"ltrForward"] = m_streamSizes[1];
|
||||
|
||||
size_t layerIdx = 0;
|
||||
size_t idx = 0;
|
||||
int recur_idx = 0;
|
||||
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
|
||||
{
|
||||
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
|
||||
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
|
||||
w = builder.Mean(input);
|
||||
b = builder.InvStdDev(input);
|
||||
output = builder.PerDimMeanVarNormalization(input, w, b);
|
||||
|
||||
input = output;
|
||||
}
|
||||
|
||||
size_t idim = input->GetSampleMatrixNumRows();
|
||||
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
|
||||
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));
|
||||
|
||||
streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
|
||||
input = output;
|
||||
streams.push_back(input);
|
||||
|
||||
if (idx == ltrSrcIdx)
|
||||
{
|
||||
ltrSource = input;
|
||||
ltrDim = m_layerSizes[1] * m_lookupTabelOrderSizes[idx];
|
||||
}
|
||||
}
|
||||
|
||||
layerIdx++;
|
||||
|
||||
/// glue the two streams
|
||||
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");
|
||||
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
/// forward direction
|
||||
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
forwardInput = forwardOutput;
|
||||
|
||||
backwardInput = (ComputationNodePtr) builder.TimeReverse(ltrSource);
|
||||
//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
|
||||
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
|
||||
backwardInput = backwardOutput;
|
||||
|
||||
layerIdx++;
|
||||
|
||||
while (layerIdx < numHiddenLayers - 1)
|
||||
{
|
||||
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
forwardInput = forwardOutput;
|
||||
|
||||
//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
|
||||
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
|
||||
backwardInput = backwardOutput;
|
||||
|
||||
layerIdx++;
|
||||
}
|
||||
|
||||
backwardOutput = (ComputationNodePtr) builder.TimeReverse(backwardInput);
|
||||
}
|
||||
|
||||
streams.clear();
|
||||
streamdims.clear();
|
||||
streams.push_back(forwardOutput);
|
||||
streamdims.push_back(m_layerSizes[layerIdx]);
|
||||
streams.push_back(backwardOutput);
|
||||
streamdims.push_back(m_layerSizes[layerIdx]);
|
||||
|
||||
/// glue the two streams
|
||||
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel1");
|
||||
|
||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
|
||||
|
||||
input = output;
|
||||
layerIdx++;
|
||||
|
||||
/// directly connect transcription model output/feature to the output layer
|
||||
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
|
||||
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
output = builder.Times(Wxo, input);
|
||||
input = output;
|
||||
|
||||
/// here uses "labels", so only one label from multiple stream inputs are used.
|
||||
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);
|
||||
|
||||
AddTrainAndEvalCriterionNodes(input, label);
|
||||
|
||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
||||
output = builder.Softmax(input, L"outputs");
|
||||
|
||||
if (m_needPrior)
|
||||
{
|
||||
prior = builder.Mean(label);
|
||||
input = builder.Log(prior, L"LogOfPrior");
|
||||
ComputationNodePtr
|
||||
scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
|
||||
m_net->OutputNodes().push_back(scaledLogLikelihood);
|
||||
}
|
||||
else
|
||||
m_net->OutputNodes().push_back(output);
|
||||
}
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription()
|
||||
{
|
||||
|
|
|
@ -42,9 +42,7 @@ enum RNNTYPE
|
|||
CLASSLSTM = 64,
|
||||
NCELSTM = 128,
|
||||
CLSTM = 256,
|
||||
RCRF = 512,
|
||||
UNIDIRECTIONALLSTM = 19,
|
||||
BIDIRECTIONALLSTM = 20
|
||||
RCRF = 512
|
||||
};
|
||||
|
||||
enum class TrainingCriterion : int // TODO: camel-case these
|
||||
|
@ -188,12 +186,6 @@ public:
|
|||
m_rnnType = CLSTM;
|
||||
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
|
||||
m_rnnType = RCRF;
|
||||
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
|
||||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||
m_rnnType = UNIDIRECTIONALLSTM;
|
||||
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
|
||||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||
m_rnnType = BIDIRECTIONALLSTM;
|
||||
else
|
||||
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
|
||||
}
|
||||
|
@ -277,10 +269,6 @@ protected:
|
|||
|
||||
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription();
|
||||
|
|
|
@ -9,12 +9,7 @@
|
|||
#include "CommonMatrix.h"
|
||||
|
||||
// define IConfigRecord and ConfigParameters as incomplete types, in order to avoid having to include "ScriptableObjects.h" and "Config.h", as that confuses some .CU code
|
||||
namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
||||
|
||||
struct IConfigRecord;
|
||||
}
|
||||
}
|
||||
}
|
||||
namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConfigRecord; }}}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -30,4 +25,5 @@ static inline DEVICEID_TYPE DeviceFromConfig(const ConfigRecordType& /*config*/)
|
|||
} // tells runtime system to not try to use GPUs
|
||||
// TODO: find a way to use CPUDEVICE without a huge include overhead; OK so far since CPUONLY mode is sorta special...
|
||||
#endif
|
||||
|
||||
} } }
|
||||
|
|
Загрузка…
Ссылка в новой задаче