removed two more Multinetworks configs from SimpleNetworkBuilder

This commit is contained in:
Frank Seide 2016-01-22 09:42:57 -08:00
Родитель 482a6a4307
Коммит 6d31cda88a
4 изменённых файлов: 4 добавлений и 307 удалений

Просмотреть файл

@ -462,7 +462,7 @@ CNTK_SRC =\
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
$(SOURCEDIR)/ActionsLib/OtherActions.cpp \
$(SOURCEDIR)/ActionsLib/EsotericActions.cpp \
$(SOURCEDIR)/ActionsLib/SpecialPurposeActions.cpp \
$(SOURCEDIR)/SequenceTrainingLib/latticeforwardbackward.cpp \
$(SOURCEDIR)/SequenceTrainingLib/parallelforwardbackward.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \

Просмотреть файл

@ -59,12 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
case RCRF:
net = BuildSeqTrnLSTMNetworkFromDescription();
break;
case UNIDIRECTIONALLSTM:
net = BuildUnidirectionalLSTMNetworksFromDescription();
break;
case BIDIRECTIONALLSTM:
net = BuildBiDirectionalLSTMNetworksFromDescription();
break;
default:
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
}
@ -1323,130 +1317,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
return m_net;
}
/**
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
Because the past prediction is used, decoding requires beam search decoder
Developed by Kaisheng Yao
This is used in the following work
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
size_t dims = 0;
ComputationNodePtr input, w, b, u, e, Wxo, output, label, prior;
vector<ComputationNodePtr> streams;
vector<size_t> streamdims;
ComputationNodePtr inputforward, inputbackward, inputletter;
ComputationNodePtr transcription_prediction;
map<wstring, size_t> featDim;
assert(m_streamSizes.size() > 0);
inputbackward = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
m_net->FeatureNodes().push_back(inputbackward);
featDim[L"featurepastValueedTarget"] = m_streamSizes[0];
inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
m_net->FeatureNodes().push_back(inputletter);
featDim[L"ltrForward"] = m_streamSizes[1];
size_t layerIdx = 0;
size_t idx = 0;
int recur_idx = 0;
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
{
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
if (m_applyMeanVarNorm)
{
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);
input = output;
}
size_t idim = input->GetSampleMatrixNumRows();
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));
streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
input = output;
streams.push_back(input);
}
layerIdx++;
output = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");
input = output;
dims = streamdims[0] + streamdims[1];
/// now merge the streams
if (numHiddenLayers > 0)
{
while (layerIdx < numHiddenLayers)
{
switch (m_rnnType)
{
case UNIDIRECTIONALLSTM:
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
break;
default:
LogicError("This is for unidorectional LSTM model. Check rnntype to see whether it is UNIDIRECTIONALLSTMWITHPASTPREDICTION or TRANSDUCER");
}
layerIdx++;
dims = m_layerSizes[layerIdx];
input = output;
}
}
/// directly connect transcription model output/feature to the output layer
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.Times(Wxo, input);
input = output;
/// here uses "labels", so only one label from multiple stream inputs are used.
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);
AddTrainAndEvalCriterionNodes(input, label, w);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(input, L"outputs");
if (m_needPrior)
{
prior = builder.Mean(label);
input = builder.Log(prior, L"LogOfPrior");
ComputationNodePtr scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
m_net->OutputNodes().push_back(scaledLogLikelihood);
}
else
m_net->OutputNodes().push_back(output);
}
return m_net;
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilder<ElemType>::BuildLSTMComponentWithMultiInputs(ULONG& randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse)
{
@ -1637,163 +1507,6 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
return output;
}
/**
Build a bi-directional LSTM network to compute the following
p(y_t | y_1^{t-1}, x_1^T)
The target side for y_t is a LSTM language model with past prediction y_{t-1} as its input. This language model also uses
the outputs from the forwawrd direction LSTM and the output from the backward direction LSTM that are operated on the source side.
Developed by Kaisheng Yao.
This is used in the following works:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior, Wxo;
ComputationNodePtr forwardInput, forwardOutput, backwardInput, backwardOutput;
vector<ComputationNodePtr> streams;
vector<size_t> streamdims;
ComputationNodePtr inputprediction, inputletter, ngram;
ComputationNodePtr ltrSource;
size_t ltrDim = 0;
map<wstring, size_t> featDim;
size_t ltrSrcIdx = 1;
/// create projections to use pastValue predictions
inputprediction = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
m_net->FeatureNodes().push_back(inputprediction);
inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
m_net->FeatureNodes().push_back(inputletter);
featDim[L"ltrForward"] = m_streamSizes[1];
size_t layerIdx = 0;
size_t idx = 0;
int recur_idx = 0;
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
{
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
if (m_applyMeanVarNorm)
{
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);
input = output;
}
size_t idim = input->GetSampleMatrixNumRows();
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));
streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
input = output;
streams.push_back(input);
if (idx == ltrSrcIdx)
{
ltrSource = input;
ltrDim = m_layerSizes[1] * m_lookupTabelOrderSizes[idx];
}
}
layerIdx++;
/// glue the two streams
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");
if (numHiddenLayers > 0)
{
/// forward direction
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
forwardInput = forwardOutput;
backwardInput = (ComputationNodePtr) builder.TimeReverse(ltrSource);
//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
backwardInput = backwardOutput;
layerIdx++;
while (layerIdx < numHiddenLayers - 1)
{
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
forwardInput = forwardOutput;
//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
backwardInput = backwardOutput;
layerIdx++;
}
backwardOutput = (ComputationNodePtr) builder.TimeReverse(backwardInput);
}
streams.clear();
streamdims.clear();
streams.push_back(forwardOutput);
streamdims.push_back(m_layerSizes[layerIdx]);
streams.push_back(backwardOutput);
streamdims.push_back(m_layerSizes[layerIdx]);
/// glue the two streams
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel1");
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
input = output;
layerIdx++;
/// directly connect transcription model output/feature to the output layer
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.Times(Wxo, input);
input = output;
/// here uses "labels", so only one label from multiple stream inputs are used.
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);
AddTrainAndEvalCriterionNodes(input, label);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(input, L"outputs");
if (m_needPrior)
{
prior = builder.Mean(label);
input = builder.Log(prior, L"LogOfPrior");
ComputationNodePtr
scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
m_net->OutputNodes().push_back(scaledLogLikelihood);
}
else
m_net->OutputNodes().push_back(output);
}
return m_net;
}
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription()
{

Просмотреть файл

@ -42,9 +42,7 @@ enum RNNTYPE
CLASSLSTM = 64,
NCELSTM = 128,
CLSTM = 256,
RCRF = 512,
UNIDIRECTIONALLSTM = 19,
BIDIRECTIONALLSTM = 20
RCRF = 512
};
enum class TrainingCriterion : int // TODO: camel-case these
@ -188,12 +186,6 @@ public:
m_rnnType = CLSTM;
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
m_rnnType = RCRF;
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = UNIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = BIDIRECTIONALLSTM;
else
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
}
@ -277,10 +269,6 @@ protected:
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription();
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription();

Просмотреть файл

@ -9,12 +9,7 @@
#include "CommonMatrix.h"
// define IConfigRecord and ConfigParameters as incomplete types, in order to avoid having to include "ScriptableObjects.h" and "Config.h", as that confuses some .CU code
namespace Microsoft { namespace MSR { namespace ScriptableObjects {
struct IConfigRecord;
}
}
}
namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConfigRecord; }}}
namespace Microsoft { namespace MSR { namespace CNTK {
@ -30,4 +25,5 @@ static inline DEVICEID_TYPE DeviceFromConfig(const ConfigRecordType& /*config*/)
} // tells runtime system to not try to use GPUs
// TODO: find a way to use CPUDEVICE without a huge include overhead; OK so far since CPUONLY mode is sorta special...
#endif
} } }