normalized comment separator from /// to //
This commit is contained in:
Родитель
03a4fcb295
Коммит
27641d86d0
|
@ -271,7 +271,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
||||||
}
|
}
|
||||||
std::unordered_map<string, double> v_count;
|
std::unordered_map<string, double> v_count;
|
||||||
|
|
||||||
/// get line
|
// get line
|
||||||
string str;
|
string str;
|
||||||
vector<string> vstr;
|
vector<string> vstr;
|
||||||
long long prevClsIdx = -1;
|
long long prevClsIdx = -1;
|
||||||
|
@ -422,7 +422,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
||||||
long long clsIdx = nbrCls > 0 ? m_class[i] : 0;
|
long long clsIdx = nbrCls > 0 ? m_class[i] : 0;
|
||||||
if (nbrCls > 0 && clsIdx != prevClsIdx)
|
if (nbrCls > 0 && clsIdx != prevClsIdx)
|
||||||
{
|
{
|
||||||
cls2idx(clsIdx, 0) = (ElemType) i; /// the left boundary of clsIdx
|
cls2idx(clsIdx, 0) = (ElemType) i; // the left boundary of clsIdx
|
||||||
prevClsIdx = m_class[i];
|
prevClsIdx = m_class[i];
|
||||||
}
|
}
|
||||||
ofvocab << " " << i << "\t " << m_count[i] << "\t" << m_words[i] << "\t" << clsIdx << std::endl;
|
ofvocab << " " << i << "\t " << m_count[i] << "\t" << m_words[i] << "\t" << clsIdx << std::endl;
|
||||||
|
@ -431,7 +431,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
||||||
ofvocab.close();
|
ofvocab.close();
|
||||||
if (nbrCls > 0)
|
if (nbrCls > 0)
|
||||||
{
|
{
|
||||||
/// write the outputs
|
// write the outputs
|
||||||
msra::files::make_intermediate_dirs(s2ws(outputWord2Cls));
|
msra::files::make_intermediate_dirs(s2ws(outputWord2Cls));
|
||||||
ofstream ofp(outputWord2Cls.c_str());
|
ofstream ofp(outputWord2Cls.c_str());
|
||||||
if (!ofp)
|
if (!ofp)
|
||||||
|
|
|
@ -204,7 +204,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
output = ApplyNonlinearFunction(
|
output = ApplyNonlinearFunction(
|
||||||
builder.Plus(
|
builder.Plus(
|
||||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||||
|
@ -235,7 +235,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
output = ApplyNonlinearFunction(
|
output = ApplyNonlinearFunction(
|
||||||
builder.Plus(
|
builder.Plus(
|
||||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||||
|
@ -316,7 +316,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
output = ApplyNonlinearFunction(
|
output = ApplyNonlinearFunction(
|
||||||
builder.Plus(
|
builder.Plus(
|
||||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||||
|
@ -346,7 +346,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
output = ApplyNonlinearFunction(
|
output = ApplyNonlinearFunction(
|
||||||
builder.Plus(
|
builder.Plus(
|
||||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||||
|
@ -366,13 +366,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
// need to have [input_dim x output_dim] matrix
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
// the label is a dense matrix. each element is the word index
|
||||||
label = builder.CreateInputNode(L"labels", 4);
|
label = builder.CreateInputNode(L"labels", 4);
|
||||||
|
|
||||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||||
|
@ -444,7 +444,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
||||||
{
|
{
|
||||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
/// previously used function. now uses LSTMNode which is correct and fast
|
// previously used function. now uses LSTMNode which is correct and fast
|
||||||
input = output;
|
input = output;
|
||||||
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
||||||
{
|
{
|
||||||
|
@ -458,7 +458,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// serve as a global bias term
|
// serve as a global bias term
|
||||||
gt = builder.CreateInputNode(L"binaryFeature", m_auxFeatDim);
|
gt = builder.CreateInputNode(L"binaryFeature", m_auxFeatDim);
|
||||||
m_net->FeatureNodes().push_back(gt);
|
m_net->FeatureNodes().push_back(gt);
|
||||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"AuxTrans%d", 0),
|
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"AuxTrans%d", 0),
|
||||||
|
@ -468,13 +468,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
||||||
output = builder.Plus(input, u, L"PlusGlobalBias");
|
output = builder.Plus(input, u, L"PlusGlobalBias");
|
||||||
input = output;
|
input = output;
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
// need to have [input_dim x output_dim] matrix
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
// the label is a dense matrix. each element is the word index
|
||||||
label = builder.CreateInputNode(L"labels", 4);
|
label = builder.CreateInputNode(L"labels", 4);
|
||||||
|
|
||||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||||
|
@ -542,7 +542,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFro
|
||||||
}
|
}
|
||||||
|
|
||||||
int recur_idx = 0;
|
int recur_idx = 0;
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
int ik = 1;
|
int ik = 1;
|
||||||
output = input;
|
output = input;
|
||||||
while (ik <= m_maOrder)
|
while (ik <= m_maOrder)
|
||||||
|
@ -675,7 +675,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFrom
|
||||||
Wxi = builder.CreateLearnableParameter(L"WXI", m_layerSizes[1], m_layerSizes[0]);
|
Wxi = builder.CreateLearnableParameter(L"WXI", m_layerSizes[1], m_layerSizes[0]);
|
||||||
m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||||
it = builder.Plus(
|
it = builder.Plus(
|
||||||
builder.Tanh(
|
builder.Tanh(
|
||||||
builder.Plus(
|
builder.Plus(
|
||||||
|
@ -994,7 +994,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFrom
|
||||||
outputFromEachLayer[1] = input;
|
outputFromEachLayer[1] = input;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// direct connect from input node to output node
|
// direct connect from input node to output node
|
||||||
|
|
||||||
int recur_idx = 0;
|
int recur_idx = 0;
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||||
|
@ -1097,7 +1097,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
||||||
{
|
{
|
||||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
/// previously used function. now uses LSTMNode which is correct and fast
|
// previously used function. now uses LSTMNode which is correct and fast
|
||||||
input = output;
|
input = output;
|
||||||
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
||||||
{
|
{
|
||||||
|
@ -1111,13 +1111,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
// need to have [input_dim x output_dim] matrix
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
// the label is a dense matrix. each element is the word index
|
||||||
label = builder.CreateInputNode(L"labels", 4);
|
label = builder.CreateInputNode(L"labels", 4);
|
||||||
|
|
||||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||||
|
@ -1158,16 +1158,16 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
|
||||||
size_t nDim = inputDim + outputDim + 2;
|
size_t nDim = inputDim + outputDim + 2;
|
||||||
wInputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WINPUTGATE%d", iLayer), outputDim, nDim);
|
wInputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WINPUTGATE%d", iLayer), outputDim, nDim);
|
||||||
m_net->InitLearnableParameters(wInputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(wInputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
wInputGate->Value().ColumnSlice(0, 1).SetValue(m_inputGateInitVal); /// init to input gate bias
|
wInputGate->Value().ColumnSlice(0, 1).SetValue(m_inputGateInitVal); // init to input gate bias
|
||||||
wForgetGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WFORGETGATE%d", iLayer), outputDim, nDim);
|
wForgetGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WFORGETGATE%d", iLayer), outputDim, nDim);
|
||||||
m_net->InitLearnableParameters(wForgetGate, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(wForgetGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
wForgetGate->Value().ColumnSlice(0, 1).SetValue(m_forgetGateInitVal); /// init to forget gate bias
|
wForgetGate->Value().ColumnSlice(0, 1).SetValue(m_forgetGateInitVal); // init to forget gate bias
|
||||||
wOutputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WOUTPUTGATE%d", iLayer), outputDim, nDim);
|
wOutputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WOUTPUTGATE%d", iLayer), outputDim, nDim);
|
||||||
m_net->InitLearnableParameters(wOutputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(wOutputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
wOutputGate->Value().ColumnSlice(0, 1).SetValue(m_outputGateInitVal); /// init to output gate bias
|
wOutputGate->Value().ColumnSlice(0, 1).SetValue(m_outputGateInitVal); // init to output gate bias
|
||||||
wMemoryCellMatrix = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WMEMORYCELLWEIGHT%d", iLayer), outputDim, inputDim + outputDim + 1);
|
wMemoryCellMatrix = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WMEMORYCELLWEIGHT%d", iLayer), outputDim, inputDim + outputDim + 1);
|
||||||
m_net->InitLearnableParameters(wMemoryCellMatrix, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(wMemoryCellMatrix, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
wMemoryCellMatrix->Value().ColumnSlice(0, 1).SetValue(0); /// init to memory cell bias
|
wMemoryCellMatrix->Value().ColumnSlice(0, 1).SetValue(0); // init to memory cell bias
|
||||||
|
|
||||||
output = builder.LSTM(inputObs, wInputGate, wForgetGate, wOutputGate, wMemoryCellMatrix, msra::strfun::wstrprintf(L"LSTM%d", iLayer));
|
output = builder.LSTM(inputObs, wInputGate, wForgetGate, wOutputGate, wMemoryCellMatrix, msra::strfun::wstrprintf(L"LSTM%d", iLayer));
|
||||||
|
|
||||||
|
@ -1241,7 +1241,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
||||||
outputFromEachLayer[1] = input;
|
outputFromEachLayer[1] = input;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// direct connect from input node to output node
|
// direct connect from input node to output node
|
||||||
|
|
||||||
int recur_idx = 0;
|
int recur_idx = 0;
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||||
|
@ -1250,7 +1250,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
||||||
|
|
||||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||||
/// previously used function. now uses LSTMNode which is correct and fast
|
// previously used function. now uses LSTMNode which is correct and fast
|
||||||
input = output;
|
input = output;
|
||||||
outputFromEachLayer[offset + 1] = input;
|
outputFromEachLayer[offset + 1] = input;
|
||||||
|
|
||||||
|
@ -1543,7 +1543,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDes
|
||||||
outputFromEachLayer[1] = input;
|
outputFromEachLayer[1] = input;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// direct connect from input node to output node
|
// direct connect from input node to output node
|
||||||
|
|
||||||
int recur_idx = 0;
|
int recur_idx = 0;
|
||||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||||
|
@ -1580,19 +1580,19 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDes
|
||||||
|
|
||||||
for (size_t i = offset; i < m_layerSizes.size(); i++)
|
for (size_t i = offset; i < m_layerSizes.size(); i++)
|
||||||
{
|
{
|
||||||
/// add direct connect from each layers' output to the layer before the output layer
|
// add direct connect from each layers' output to the layer before the output layer
|
||||||
output = BuildDirectConnect(randomSeed, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input);
|
output = BuildDirectConnect(randomSeed, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input);
|
||||||
if (output != nullptr)
|
if (output != nullptr)
|
||||||
input = output;
|
input = output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// need to have [input_dim x output_dim] matrix
|
// need to have [input_dim x output_dim] matrix
|
||||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||||
|
|
||||||
/// the label is a dense matrix. each element is the word index
|
// the label is a dense matrix. each element is the word index
|
||||||
label = builder.CreateInputNode(L"labels", 2 * (this->nce_noises + 1));
|
label = builder.CreateInputNode(L"labels", 2 * (this->nce_noises + 1));
|
||||||
|
|
||||||
bias = builder.CreateLearnableParameter(L"BiasVector", 1, m_layerSizes[m_layerSizes.size() - 1]);
|
bias = builder.CreateLearnableParameter(L"BiasVector", 1, m_layerSizes[m_layerSizes.size() - 1]);
|
||||||
|
|
|
@ -32,7 +32,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
enum RNNTYPE
|
enum RNNTYPE
|
||||||
{
|
{
|
||||||
SIMPLENET = 0, /// no recurrent connections
|
SIMPLENET = 0, // no recurrent connections
|
||||||
SIMPLERNN = 1,
|
SIMPLERNN = 1,
|
||||||
LSTM = 2,
|
LSTM = 2,
|
||||||
DEEPRNN = 4,
|
DEEPRNN = 4,
|
||||||
|
@ -147,9 +147,9 @@ public:
|
||||||
|
|
||||||
ConfigArray sSizes = config("streamSizes", "");
|
ConfigArray sSizes = config("streamSizes", "");
|
||||||
m_streamSizes = sSizes;
|
m_streamSizes = sSizes;
|
||||||
sSizes = config("lookupTableOrderSizes", ""); /// this allows having a multiple streams of inputs with
|
sSizes = config("lookupTableOrderSizes", ""); // this allows having a multiple streams of inputs with
|
||||||
/// different lookuptable order sizes. the older one lookupTableOrder is still kept to have backward
|
// different lookuptable order sizes. the older one lookupTableOrder is still kept to have backward
|
||||||
/// support.
|
// support.
|
||||||
m_lookupTabelOrderSizes = sSizes;
|
m_lookupTabelOrderSizes = sSizes;
|
||||||
|
|
||||||
m_labelEmbeddingSize = config("labelEmbeddingSize", "10");
|
m_labelEmbeddingSize = config("labelEmbeddingSize", "10");
|
||||||
|
@ -346,14 +346,14 @@ protected:
|
||||||
TrainingCriterion m_trainCriterion;
|
TrainingCriterion m_trainCriterion;
|
||||||
EvalCriterion m_evalCriterion;
|
EvalCriterion m_evalCriterion;
|
||||||
|
|
||||||
intargvector m_directConnect; /// connect those layers directly in a sequence order
|
intargvector m_directConnect; // connect those layers directly in a sequence order
|
||||||
/// for example: 1:2:3 will connect 1 to 2 and then 2 to 3
|
// for example: 1:2:3 will connect 1 to 2 and then 2 to 3
|
||||||
|
|
||||||
/// recurrent network
|
// recurrent network
|
||||||
intargvector m_recurrentLayers;
|
intargvector m_recurrentLayers;
|
||||||
float m_defaultHiddenActivity;
|
float m_defaultHiddenActivity;
|
||||||
RNNTYPE m_rnnType;
|
RNNTYPE m_rnnType;
|
||||||
int m_maOrder; /// MA model order
|
int m_maOrder; // MA model order
|
||||||
|
|
||||||
bool m_constForgetGateValue;
|
bool m_constForgetGateValue;
|
||||||
bool m_constInputGateValue;
|
bool m_constInputGateValue;
|
||||||
|
@ -363,18 +363,18 @@ protected:
|
||||||
ElemType m_inputGateInitVal;
|
ElemType m_inputGateInitVal;
|
||||||
ElemType m_outputGateInitVal;
|
ElemType m_outputGateInitVal;
|
||||||
|
|
||||||
intargvector m_streamSizes; /// for multiple stream data
|
intargvector m_streamSizes; // for multiple stream data
|
||||||
intargvector m_lookupTabelOrderSizes; /// each stream has its own projection, so need to provide with the lookup table order size for each stream
|
intargvector m_lookupTabelOrderSizes; // each stream has its own projection, so need to provide with the lookup table order size for each stream
|
||||||
|
|
||||||
int m_lookupTableOrder;
|
int m_lookupTableOrder;
|
||||||
int m_labelEmbeddingSize;
|
int m_labelEmbeddingSize;
|
||||||
|
|
||||||
/// these are the file names for word 2 class mapping and class to word index mapping
|
// these are the file names for word 2 class mapping and class to word index mapping
|
||||||
/// these are used for class-based language modeling
|
// these are used for class-based language modeling
|
||||||
string m_cls2index;
|
string m_cls2index;
|
||||||
string m_word2class;
|
string m_word2class;
|
||||||
int m_nbrCls; /// number of classes
|
int m_nbrCls; // number of classes
|
||||||
int m_vocabSize; /// vocabulary size
|
int m_vocabSize; // vocabulary size
|
||||||
int nce_noises;
|
int nce_noises;
|
||||||
|
|
||||||
bool m_sparse_input;
|
bool m_sparse_input;
|
||||||
|
|
|
@ -51,7 +51,7 @@ void DataReader<ElemType>::InitFromConfig(const ConfigRecordType& /*config*/)
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
void DataReader<ElemType>::Destroy()
|
void DataReader<ElemType>::Destroy()
|
||||||
{
|
{
|
||||||
/// newer code that explicitly place multiple streams for inputs
|
// newer code that explicitly place multiple streams for inputs
|
||||||
foreach_index (i, m_ioNames) // inputNames should map to node names
|
foreach_index (i, m_ioNames) // inputNames should map to node names
|
||||||
{
|
{
|
||||||
m_dataReaders[m_ioNames[i]]->Destroy();
|
m_dataReaders[m_ioNames[i]]->Destroy();
|
||||||
|
|
|
@ -935,7 +935,7 @@ protected:
|
||||||
std::vector<ComputationNodeBasePtr> m_finalCriteria;
|
std::vector<ComputationNodeBasePtr> m_finalCriteria;
|
||||||
std::vector<ComputationNodeBasePtr> m_evalNodes;
|
std::vector<ComputationNodeBasePtr> m_evalNodes;
|
||||||
std::vector<ComputationNodeBasePtr> m_outputNodes;
|
std::vector<ComputationNodeBasePtr> m_outputNodes;
|
||||||
std::vector<ComputationNodeBasePtr> m_pairNodes; /// nodes for the children network to pair
|
std::vector<ComputationNodeBasePtr> m_pairNodes; // nodes for the children network to pair
|
||||||
vector<std::vector<ComputationNodeBasePtr>*> GetAllNodeGroups() // get all groups to allow to iterate over all of them ...continue
|
vector<std::vector<ComputationNodeBasePtr>*> GetAllNodeGroups() // get all groups to allow to iterate over all of them ...continue
|
||||||
{
|
{
|
||||||
return vector<std::vector<ComputationNodeBasePtr>*>{&m_features, &m_labels, &m_finalCriteria, &m_evalNodes, &m_outputNodes, &m_pairNodes};
|
return vector<std::vector<ComputationNodeBasePtr>*>{&m_features, &m_labels, &m_finalCriteria, &m_evalNodes, &m_outputNodes, &m_pairNodes};
|
||||||
|
|
|
@ -1576,7 +1576,7 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/// these two are used to pass gradients from future minibatch
|
// these two are used to pass gradients from future minibatch
|
||||||
virtual void GetErrorsToPreviousMinibatch(Matrix<ElemType>&)
|
virtual void GetErrorsToPreviousMinibatch(Matrix<ElemType>&)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -176,7 +176,7 @@ public:
|
||||||
static void DecideStartEndingOutputLab(const Matrix<ElemType>& lbls, int& stt, int& stp)
|
static void DecideStartEndingOutputLab(const Matrix<ElemType>& lbls, int& stt, int& stp)
|
||||||
{
|
{
|
||||||
if (stt != -1 && stp != -1)
|
if (stt != -1 && stp != -1)
|
||||||
return; /// have computed before
|
return; // have computed before
|
||||||
|
|
||||||
int iNumPos = lbls.GetNumCols();
|
int iNumPos = lbls.GetNumCols();
|
||||||
|
|
||||||
|
@ -214,7 +214,7 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compute posterior probability of label y at position t
|
// compute posterior probability of label y at position t
|
||||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||||
{
|
{
|
||||||
DecideStartEndingOutputLab(Input(0)->Value(), mStartLab, mEndLab);
|
DecideStartEndingOutputLab(Input(0)->Value(), mStartLab, mEndLab);
|
||||||
|
@ -225,27 +225,27 @@ public:
|
||||||
// compute forward backward algorithm
|
// compute forward backward algorithm
|
||||||
void ForwardPropS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
|
void ForwardPropS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
|
||||||
{
|
{
|
||||||
/// to-do, each slice is for one sentence
|
// to-do, each slice is for one sentence
|
||||||
/// to-do, number of slices correspond to number of frames
|
// to-do, number of slices correspond to number of frames
|
||||||
/// this implementation only supports one sentence per minibatch
|
// this implementation only supports one sentence per minibatch
|
||||||
|
|
||||||
/// change to other values so can support multiple sentences in each minibatch
|
// change to other values so can support multiple sentences in each minibatch
|
||||||
ForwardCompute(alpha, backtrace, pos_scores, pair_scores, stt);
|
ForwardCompute(alpha, backtrace, pos_scores, pair_scores, stt);
|
||||||
BackwardCompute(functionValues, backtrace, stp);
|
BackwardCompute(functionValues, backtrace, stp);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// compute forward backward algorithm
|
// compute forward backward algorithm
|
||||||
static void ForwardCompute(Matrix<ElemType>& alpha,
|
static void ForwardCompute(Matrix<ElemType>& alpha,
|
||||||
Matrix<ElemType>& backtrace,
|
Matrix<ElemType>& backtrace,
|
||||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores,
|
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores,
|
||||||
const size_t stt)
|
const size_t stt)
|
||||||
{
|
{
|
||||||
/// to-do, shift more than 1 to support muliple sentences per minibatch
|
// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||||
int iNumPos = pos_scores.GetNumCols();
|
int iNumPos = pos_scores.GetNumCols();
|
||||||
int iNumLab = pos_scores.GetNumRows();
|
int iNumLab = pos_scores.GetNumRows();
|
||||||
size_t iTmp = 0;
|
size_t iTmp = 0;
|
||||||
|
|
||||||
/// need to have
|
// need to have
|
||||||
alpha.Resize(iNumLab, iNumPos);
|
alpha.Resize(iNumLab, iNumPos);
|
||||||
backtrace.Resize(iNumLab, iNumPos);
|
backtrace.Resize(iNumLab, iNumPos);
|
||||||
|
|
||||||
|
@ -265,11 +265,11 @@ public:
|
||||||
iTmp = j;
|
iTmp = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fTmp += pos_scores(k, t); /// include position dependent score
|
fTmp += pos_scores(k, t); // include position dependent score
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// with constrain that the first word is labeled as a given symbol
|
// with constrain that the first word is labeled as a given symbol
|
||||||
iTmp = stt;
|
iTmp = stt;
|
||||||
fTmp = 0;
|
fTmp = 0;
|
||||||
if (t == 1)
|
if (t == 1)
|
||||||
|
@ -289,7 +289,7 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// compute backward algorithm
|
// compute backward algorithm
|
||||||
static void BackwardCompute(
|
static void BackwardCompute(
|
||||||
Matrix<ElemType>& decodedpath,
|
Matrix<ElemType>& decodedpath,
|
||||||
const Matrix<ElemType>& backtrace, const size_t stp)
|
const Matrix<ElemType>& backtrace, const size_t stp)
|
||||||
|
@ -310,8 +310,8 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// need to feed in pseudo label data, which tells the decoder what is the beginning
|
// need to feed in pseudo label data, which tells the decoder what is the beginning
|
||||||
/// and ending output symbol. these symbols will constrain the search space
|
// and ending output symbol. these symbols will constrain the search space
|
||||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||||
{
|
{
|
||||||
Base::Validate(isFinalValidationPass);
|
Base::Validate(isFinalValidationPass);
|
||||||
|
|
|
@ -530,7 +530,7 @@ public:
|
||||||
|
|
||||||
ForwardProp(FrameRange(m_pMBLayout));
|
ForwardProp(FrameRange(m_pMBLayout));
|
||||||
|
|
||||||
/// check with expected values
|
// check with expected values
|
||||||
Value().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
|
Value().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
|
||||||
if (!ISCLOSE(Value()(0, 0), 1.0, EPSILON) ||
|
if (!ISCLOSE(Value()(0, 0), 1.0, EPSILON) ||
|
||||||
!ISCLOSE(Value()(0, 1), 2.0, EPSILON) ||
|
!ISCLOSE(Value()(0, 1), 2.0, EPSILON) ||
|
||||||
|
@ -550,7 +550,7 @@ public:
|
||||||
BackpropTo(i, FrameRange(m_pMBLayout));
|
BackpropTo(i, FrameRange(m_pMBLayout));
|
||||||
|
|
||||||
// check with expected values
|
// check with expected values
|
||||||
if (!ISCLOSE(Input(1)->Gradient()(0, 0), 2, EPSILON) /// bi
|
if (!ISCLOSE(Input(1)->Gradient()(0, 0), 2, EPSILON) // bi
|
||||||
|| !ISCLOSE(Input(1)->Gradient()(0, 1), 2, EPSILON) // Wxi
|
|| !ISCLOSE(Input(1)->Gradient()(0, 1), 2, EPSILON) // Wxi
|
||||||
|| !ISCLOSE(Input(1)->Gradient()(1, 0), 2, EPSILON) // Whi
|
|| !ISCLOSE(Input(1)->Gradient()(1, 0), 2, EPSILON) // Whi
|
||||||
|| !ISCLOSE(Input(1)->Gradient()(2, 1), 2, EPSILON) // Wci
|
|| !ISCLOSE(Input(1)->Gradient()(2, 1), 2, EPSILON) // Wci
|
||||||
|
|
|
@ -1003,9 +1003,9 @@ protected:
|
||||||
Matrix<ElemType> m_clsLogSoftmax;
|
Matrix<ElemType> m_clsLogSoftmax;
|
||||||
Matrix<ElemType> m_clsSoftmax;
|
Matrix<ElemType> m_clsSoftmax;
|
||||||
|
|
||||||
/// gradient of cross entropy with respect to the input of softmax
|
// gradient of cross entropy with respect to the input of softmax
|
||||||
/// a 1 row by \sum_t m_nbrWordsInEachTime[t] vector
|
// a 1 row by \sum_t m_nbrWordsInEachTime[t] vector
|
||||||
/// one slice of size m_nbrWordsInEachTime[t] saves the input to softmax for word y_t
|
// one slice of size m_nbrWordsInEachTime[t] saves the input to softmax for word y_t
|
||||||
Matrix<ElemType> m_grdToSoftMaxInput;
|
Matrix<ElemType> m_grdToSoftMaxInput;
|
||||||
bool m_needRecomputeGradientToSoftmaxInput;
|
bool m_needRecomputeGradientToSoftmaxInput;
|
||||||
|
|
||||||
|
@ -1061,7 +1061,7 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compute posterior probability of label y at position t
|
// compute posterior probability of label y at position t
|
||||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||||
{
|
{
|
||||||
FrameRange fr(Input(0)->GetMBLayout());
|
FrameRange fr(Input(0)->GetMBLayout());
|
||||||
|
@ -1136,13 +1136,13 @@ public:
|
||||||
// compute forward backward algorithm
|
// compute forward backward algorithm
|
||||||
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
|
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
|
||||||
{
|
{
|
||||||
/// to-do, each slice is for one sentence
|
// to-do, each slice is for one sentence
|
||||||
/// to-do, number of slices correspond to number of frames
|
// to-do, number of slices correspond to number of frames
|
||||||
/// this implementation only supports one sentence per minibatch
|
// this implementation only supports one sentence per minibatch
|
||||||
|
|
||||||
int nObs = lbls.GetNumCols();
|
int nObs = lbls.GetNumCols();
|
||||||
|
|
||||||
/// change to other values so can support multiple sentences in each minibatch
|
// change to other values so can support multiple sentences in each minibatch
|
||||||
assert(iStep == 1);
|
assert(iStep == 1);
|
||||||
ForwardCompute(alpha, lbls, pos_scores, pair_scores);
|
ForwardCompute(alpha, lbls, pos_scores, pair_scores);
|
||||||
BackwardCompute(alpha, beta, functionValues, lbls, pos_scores, pair_scores, iStep);
|
BackwardCompute(alpha, beta, functionValues, lbls, pos_scores, pair_scores, iStep);
|
||||||
|
@ -1170,7 +1170,7 @@ public:
|
||||||
ElemType fAlpha;
|
ElemType fAlpha;
|
||||||
fAlpha = a.LogAddSumOfElements();
|
fAlpha = a.LogAddSumOfElements();
|
||||||
|
|
||||||
/// transition score
|
// transition score
|
||||||
ElemType tscore = 0;
|
ElemType tscore = 0;
|
||||||
for (int t = 0; t < nObs - 1; t++)
|
for (int t = 0; t < nObs - 1; t++)
|
||||||
{
|
{
|
||||||
|
@ -1190,19 +1190,19 @@ public:
|
||||||
}
|
}
|
||||||
tscore += pair_scores(j, i);
|
tscore += pair_scores(j, i);
|
||||||
}
|
}
|
||||||
tscore += functionValues.Get00Element(); /// correct path score
|
tscore += functionValues.Get00Element(); // correct path score
|
||||||
tscore -= fAlpha; /// reduced by the scores from all paths
|
tscore -= fAlpha; // reduced by the scores from all paths
|
||||||
functionValues.SetValue(tscore);
|
functionValues.SetValue(tscore);
|
||||||
|
|
||||||
functionValues *= (-1);
|
functionValues *= (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compute forward backward algorithm
|
// compute forward backward algorithm
|
||||||
static void ForwardCompute(Matrix<ElemType>& alpha,
|
static void ForwardCompute(Matrix<ElemType>& alpha,
|
||||||
const Matrix<ElemType>& lbls,
|
const Matrix<ElemType>& lbls,
|
||||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores)
|
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores)
|
||||||
{
|
{
|
||||||
/// to-do, shift more than 1 to support muliple sentences per minibatch
|
// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||||
int iNumPos = lbls.GetNumCols();
|
int iNumPos = lbls.GetNumCols();
|
||||||
int iNumLab = lbls.GetNumRows();
|
int iNumLab = lbls.GetNumRows();
|
||||||
|
|
||||||
|
@ -1214,7 +1214,7 @@ public:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// need to have
|
// need to have
|
||||||
alpha.Resize(iNumLab, iNumPos);
|
alpha.Resize(iNumLab, iNumPos);
|
||||||
|
|
||||||
for (int t = 0; t < iNumPos; t++)
|
for (int t = 0; t < iNumPos; t++)
|
||||||
|
@ -1229,13 +1229,13 @@ public:
|
||||||
fAlpha = alpha(j, t - 1);
|
fAlpha = alpha(j, t - 1);
|
||||||
fTmp = alpha.LogAdd(fTmp, fAlpha + pair_scores(k, j));
|
fTmp = alpha.LogAdd(fTmp, fAlpha + pair_scores(k, j));
|
||||||
}
|
}
|
||||||
fTmp += pos_scores(k, t); /// include position dependent score
|
fTmp += pos_scores(k, t); // include position dependent score
|
||||||
alpha(k, t) = fTmp;
|
alpha(k, t) = fTmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compute backward algorithm
|
// compute backward algorithm
|
||||||
static void BackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
|
static void BackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
|
||||||
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
|
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
|
||||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift = 1)
|
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift = 1)
|
||||||
|
@ -1263,7 +1263,7 @@ public:
|
||||||
startLbl, shift);
|
startLbl, shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// compute forward backward algorithm
|
// compute forward backward algorithm
|
||||||
static void PostProbCompute(Matrix<ElemType>& postprob, const Matrix<ElemType>& alpha, const Matrix<ElemType>& beta)
|
static void PostProbCompute(Matrix<ElemType>& postprob, const Matrix<ElemType>& alpha, const Matrix<ElemType>& beta)
|
||||||
{
|
{
|
||||||
int iNumPos = alpha.GetNumCols();
|
int iNumPos = alpha.GetNumCols();
|
||||||
|
|
|
@ -5359,7 +5359,7 @@ void CPUMatrix<ElemType>::RCRFTransGrdCompute(const CPUMatrix<ElemType>& lbls,
|
||||||
_rcrfTransGrdCompute(i, lbls, alpha, beta, pair_scores, grd, tPos);
|
_rcrfTransGrdCompute(i, lbls, alpha, beta, pair_scores, grd, tPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// transition score
|
// transition score
|
||||||
int i = -1;
|
int i = -1;
|
||||||
if (tPos == 0)
|
if (tPos == 0)
|
||||||
i = firstLbl;
|
i = firstLbl;
|
||||||
|
@ -5394,7 +5394,7 @@ void CPUMatrix<ElemType>::_rcrfTransGrdCompute(size_t i,
|
||||||
const CPUMatrix<ElemType>& beta,
|
const CPUMatrix<ElemType>& beta,
|
||||||
const CPUMatrix<ElemType>& pair_scores,
|
const CPUMatrix<ElemType>& pair_scores,
|
||||||
CPUMatrix<ElemType>& grd,
|
CPUMatrix<ElemType>& grd,
|
||||||
const size_t tPos /// position
|
const size_t tPos // position
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
int iNumLab = (int) alpha.GetNumRows();
|
int iNumLab = (int) alpha.GetNumRows();
|
||||||
|
|
|
@ -370,7 +370,7 @@ public:
|
||||||
static CPUMatrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
|
static CPUMatrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
|
||||||
static CPUMatrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
|
static CPUMatrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
|
||||||
|
|
||||||
/// return true if v is an element in matrix c
|
// return true if v is an element in matrix c
|
||||||
static bool HasElement(const CPUMatrix<ElemType>& a, const ElemType v = 0.0);
|
static bool HasElement(const CPUMatrix<ElemType>& a, const ElemType v = 0.0);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -426,7 +426,7 @@ public:
|
||||||
ElemType LogAddSumOfElements() const;
|
ElemType LogAddSumOfElements() const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// for RCRF
|
// for RCRF
|
||||||
static void RCRFBackwardCompute(const CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta,
|
static void RCRFBackwardCompute(const CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta,
|
||||||
const CPUMatrix<ElemType>& lbls,
|
const CPUMatrix<ElemType>& lbls,
|
||||||
const CPUMatrix<ElemType>& pair_scores);
|
const CPUMatrix<ElemType>& pair_scores);
|
||||||
|
@ -446,7 +446,7 @@ public:
|
||||||
const CPUMatrix<ElemType>& beta,
|
const CPUMatrix<ElemType>& beta,
|
||||||
const CPUMatrix<ElemType>& pair_scores,
|
const CPUMatrix<ElemType>& pair_scores,
|
||||||
CPUMatrix<ElemType>& grd,
|
CPUMatrix<ElemType>& grd,
|
||||||
const size_t tPos /// position
|
const size_t tPos // position
|
||||||
);
|
);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -97,7 +97,7 @@ public:
|
||||||
|
|
||||||
static bool AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
static bool AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
||||||
|
|
||||||
/// sum(vec(a).*vec(b))
|
// sum(vec(a).*vec(b))
|
||||||
static ElemType InnerProductOfMatrices(const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/)
|
static ElemType InnerProductOfMatrices(const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/)
|
||||||
{
|
{
|
||||||
NOT_IMPLEMENTED;
|
NOT_IMPLEMENTED;
|
||||||
|
|
|
@ -419,7 +419,7 @@ public:
|
||||||
|
|
||||||
static void AddElementToElement(const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
static void AddElementToElement(const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||||
|
|
||||||
/// minus one at a specific position
|
// minus one at a specific position
|
||||||
static void MinusOneAt(GPUMatrix<ElemType>& c, const size_t position);
|
static void MinusOneAt(GPUMatrix<ElemType>& c, const size_t position);
|
||||||
|
|
||||||
static void Scale(ElemType alpha, const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c);
|
static void Scale(ElemType alpha, const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c);
|
||||||
|
@ -478,7 +478,7 @@ public:
|
||||||
const GPUMatrix<ElemType>& beta,
|
const GPUMatrix<ElemType>& beta,
|
||||||
const GPUMatrix<ElemType>& pair_scores,
|
const GPUMatrix<ElemType>& pair_scores,
|
||||||
GPUMatrix<ElemType>& grd,
|
GPUMatrix<ElemType>& grd,
|
||||||
const int startLbl, /// the time 0 start symbol in the output layer
|
const int startLbl, // the time 0 start symbol in the output layer
|
||||||
const int shift);
|
const int shift);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -1345,7 +1345,7 @@ template <class ElemType>
|
||||||
__global__ void _hasElement(
|
__global__ void _hasElement(
|
||||||
const ElemType* a,
|
const ElemType* a,
|
||||||
const CUDA_LONG N,
|
const CUDA_LONG N,
|
||||||
ElemType* d_res /// [2x1] vector. The first is the value to be compared and the second is the 0/1 to return
|
ElemType* d_res // [2x1] vector. The first is the value to be compared and the second is the 0/1 to return
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
@ -4594,15 +4594,15 @@ __global__ void _minusOneAt(
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
__global__ void _rcrfBackwardCompute(
|
__global__ void _rcrfBackwardCompute(
|
||||||
const size_t iNumPos,
|
const size_t iNumPos,
|
||||||
const ElemType* galpha, /// column slice at current time t
|
const ElemType* galpha, // column slice at current time t
|
||||||
ElemType* gbeta, /// column slices with [row, 2] at current time t for [
|
ElemType* gbeta, // column slices with [row, 2] at current time t for [
|
||||||
const ElemType* gpair_scores,
|
const ElemType* gpair_scores,
|
||||||
const size_t iNumLab, const int shift)
|
const size_t iNumLab, const int shift)
|
||||||
{
|
{
|
||||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||||
|
|
||||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||||
ElemType* pair_scores = alpha + iNumPos * iNumLab;
|
ElemType* pair_scores = alpha + iNumPos * iNumLab;
|
||||||
|
@ -4611,7 +4611,7 @@ __global__ void _rcrfBackwardCompute(
|
||||||
if (id < 0 || id >= iNumLab)
|
if (id < 0 || id >= iNumLab)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/// copy global memory to shared memory to save time
|
// copy global memory to shared memory to save time
|
||||||
for (int t = iNumPos - 1; t >= 0; t--)
|
for (int t = iNumPos - 1; t >= 0; t--)
|
||||||
{
|
{
|
||||||
alpha[IDX2C(id, t, iNumLab)] = galpha[IDX2C(id, t, iNumLab)];
|
alpha[IDX2C(id, t, iNumLab)] = galpha[IDX2C(id, t, iNumLab)];
|
||||||
|
@ -4654,7 +4654,7 @@ __global__ void _rcrfBackwardCompute(
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// copy from shared memory to global memory to pass values
|
// copy from shared memory to global memory to pass values
|
||||||
for (int t = iNumPos - 1; t >= 0; t--)
|
for (int t = iNumPos - 1; t >= 0; t--)
|
||||||
{
|
{
|
||||||
gbeta[IDX2C(id, t, iNumLab)] = beta[IDX2C(id, t, iNumLab)];
|
gbeta[IDX2C(id, t, iNumLab)] = beta[IDX2C(id, t, iNumLab)];
|
||||||
|
@ -4666,18 +4666,18 @@ __global__ void _rcrfBackwardCompute(
|
||||||
/// assume a column slice of input and output
|
/// assume a column slice of input and output
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
__global__ void _rcrfBackwardCompute(
|
__global__ void _rcrfBackwardCompute(
|
||||||
const size_t t, /// time position
|
const size_t t, // time position
|
||||||
const size_t iNumPos,
|
const size_t iNumPos,
|
||||||
const ElemType* galpha, /// column slice at current time t
|
const ElemType* galpha, // column slice at current time t
|
||||||
ElemType* gbeta, /// column slices with [row, 2] at current time t for [
|
ElemType* gbeta, // column slices with [row, 2] at current time t for [
|
||||||
const ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
const ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||||
const ElemType* gpair_scores, /// column slice at current time t
|
const ElemType* gpair_scores, // column slice at current time t
|
||||||
const size_t iNumLab, const int shift)
|
const size_t iNumLab, const int shift)
|
||||||
{
|
{
|
||||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||||
|
|
||||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||||
ElemType* beta_t1 = (ElemType*) (alpha + iNumLab);
|
ElemType* beta_t1 = (ElemType*) (alpha + iNumLab);
|
||||||
|
@ -4687,7 +4687,7 @@ __global__ void _rcrfBackwardCompute(
|
||||||
if (id < 0 || id >= iNumLab)
|
if (id < 0 || id >= iNumLab)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/// copy global memory to shared memory to save time
|
// copy global memory to shared memory to save time
|
||||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||||
if (t < iNumPos - 1)
|
if (t < iNumPos - 1)
|
||||||
beta_t1[id] = gbeta[IDX2C(id, t + 1, iNumLab)];
|
beta_t1[id] = gbeta[IDX2C(id, t + 1, iNumLab)];
|
||||||
|
@ -4717,17 +4717,17 @@ __global__ void _rcrfBackwardCompute(
|
||||||
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
__global__ void _rcrfBackwardComputeZeta(
|
__global__ void _rcrfBackwardComputeZeta(
|
||||||
const size_t t, /// time position
|
const size_t t, // time position
|
||||||
const size_t iNumPos,
|
const size_t iNumPos,
|
||||||
const ElemType* galpha, /// column slice at current time t
|
const ElemType* galpha, // column slice at current time t
|
||||||
ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||||
const ElemType* gpair_scores,
|
const ElemType* gpair_scores,
|
||||||
const size_t iNumLab, const int shift)
|
const size_t iNumLab, const int shift)
|
||||||
{
|
{
|
||||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||||
|
|
||||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||||
ElemType pair_scores[1024];
|
ElemType pair_scores[1024];
|
||||||
|
@ -4735,7 +4735,7 @@ __global__ void _rcrfBackwardComputeZeta(
|
||||||
if (id < 0 || id >= iNumLab)
|
if (id < 0 || id >= iNumLab)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/// copy global memory to shared memory to save time
|
// copy global memory to shared memory to save time
|
||||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
@ -4758,10 +4758,10 @@ __global__ void _rcrfBackwardComputeZeta(
|
||||||
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
__global__ void _rcrfTransGrdComputeZeta(
|
__global__ void _rcrfTransGrdComputeZeta(
|
||||||
const int t, /// time position
|
const int t, // time position
|
||||||
const size_t iNumPos,
|
const size_t iNumPos,
|
||||||
const ElemType* galpha, /// column slice at current time t
|
const ElemType* galpha, // column slice at current time t
|
||||||
ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||||
const ElemType* gpair_scores,
|
const ElemType* gpair_scores,
|
||||||
const size_t iNumLab,
|
const size_t iNumLab,
|
||||||
const size_t start_lbl,
|
const size_t start_lbl,
|
||||||
|
@ -4769,8 +4769,8 @@ __global__ void _rcrfTransGrdComputeZeta(
|
||||||
{
|
{
|
||||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||||
|
|
||||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||||
ElemType pair_scores[1024];
|
ElemType pair_scores[1024];
|
||||||
|
@ -4778,7 +4778,7 @@ __global__ void _rcrfTransGrdComputeZeta(
|
||||||
if (id < 0 || id >= iNumLab)
|
if (id < 0 || id >= iNumLab)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/// copy global memory to shared memory to save time
|
// copy global memory to shared memory to save time
|
||||||
if (t >= 0)
|
if (t >= 0)
|
||||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||||
|
|
||||||
|
@ -4823,8 +4823,8 @@ __global__ void _rcrfTransGrdCompute(
|
||||||
{
|
{
|
||||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||||
|
|
||||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||||
ElemType* beta = (ElemType*) (alpha + iNumLab);
|
ElemType* beta = (ElemType*) (alpha + iNumLab);
|
||||||
|
@ -4834,7 +4834,7 @@ __global__ void _rcrfTransGrdCompute(
|
||||||
if (id < 0 || id >= iNumLab)
|
if (id < 0 || id >= iNumLab)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/// copy global memory to shared memory to save time
|
// copy global memory to shared memory to save time
|
||||||
if (t > 0)
|
if (t > 0)
|
||||||
alpha[id] = galpha[IDX2C(id, t - 1, iNumLab)];
|
alpha[id] = galpha[IDX2C(id, t - 1, iNumLab)];
|
||||||
beta[id] = gbeta[IDX2C(id, t, iNumLab)];
|
beta[id] = gbeta[IDX2C(id, t, iNumLab)];
|
||||||
|
@ -4897,7 +4897,7 @@ __global__ void _reductionLogAddSum(
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
/// do reduction on the shared memory
|
// do reduction on the shared memory
|
||||||
size_t start_width = ceil((N + 0.0) / 2.0);
|
size_t start_width = ceil((N + 0.0) / 2.0);
|
||||||
for (size_t s = start_width; s > 0; s >>= 1)
|
for (size_t s = start_width; s > 0; s >>= 1)
|
||||||
{
|
{
|
||||||
|
|
|
@ -548,7 +548,7 @@ public:
|
||||||
const Matrix<ElemType>& beta,
|
const Matrix<ElemType>& beta,
|
||||||
const Matrix<ElemType>& pair_scores,
|
const Matrix<ElemType>& pair_scores,
|
||||||
Matrix<ElemType>& grd,
|
Matrix<ElemType>& grd,
|
||||||
const int startLbl, /// the time 0 start symbol in the output layer
|
const int startLbl, // the time 0 start symbol in the output layer
|
||||||
const int shift);
|
const int shift);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -35,7 +35,7 @@ private:
|
||||||
// this is used to prevent CUDA out-of memory errors
|
// this is used to prevent CUDA out-of memory errors
|
||||||
|
|
||||||
vector<size_t> m_numFramesToProcess; // [seq index] number of frames available (left to return) in each parallel sequence
|
vector<size_t> m_numFramesToProcess; // [seq index] number of frames available (left to return) in each parallel sequence
|
||||||
vector<size_t> m_switchFrame; /// TODO: something like the position where a new sequence starts; still supported?
|
vector<size_t> m_switchFrame; // TODO: something like the position where a new sequence starts; still supported?
|
||||||
vector<size_t> m_numValidFrames; // [seq index] valid #frames in each parallel sequence. Frames (s, t) with t >= m_numValidFrames[s] are NoInput.
|
vector<size_t> m_numValidFrames; // [seq index] valid #frames in each parallel sequence. Frames (s, t) with t >= m_numValidFrames[s] are NoInput.
|
||||||
vector<size_t> m_extraSeqsPerMB;
|
vector<size_t> m_extraSeqsPerMB;
|
||||||
size_t m_extraNumSeqs;
|
size_t m_extraNumSeqs;
|
||||||
|
@ -132,10 +132,10 @@ private:
|
||||||
public:
|
public:
|
||||||
MBLayoutPtr m_pMBLayout;
|
MBLayoutPtr m_pMBLayout;
|
||||||
|
|
||||||
/// by default it is false
|
// by default it is false
|
||||||
/// if true, reader will set to ((int) MinibatchPackingFlags::None) for time positions that are orignally correspond to ((int) MinibatchPackingFlags::SequenceStart)
|
// if true, reader will set to ((int) MinibatchPackingFlags::None) for time positions that are orignally correspond to ((int) MinibatchPackingFlags::SequenceStart)
|
||||||
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||||
/// default will have truncated BPTT, which only does BPTT inside a minibatch
|
// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||||
bool mIgnoreSentenceBeginTag;
|
bool mIgnoreSentenceBeginTag;
|
||||||
// TODO: this ^^ does not seem to belong here.
|
// TODO: this ^^ does not seem to belong here.
|
||||||
|
|
||||||
|
|
|
@ -160,10 +160,10 @@ private:
|
||||||
public:
|
public:
|
||||||
MBLayoutPtr m_pMBLayout;
|
MBLayoutPtr m_pMBLayout;
|
||||||
|
|
||||||
/// by default it is false
|
// by default it is false
|
||||||
/// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
|
// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
|
||||||
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||||
/// default will have truncated BPTT, which only does BPTT inside a minibatch
|
// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||||
bool mIgnoreSentenceBeginTag;
|
bool mIgnoreSentenceBeginTag;
|
||||||
HTKMLFReader()
|
HTKMLFReader()
|
||||||
: m_pMBLayout(make_shared<MBLayout>())
|
: m_pMBLayout(make_shared<MBLayout>())
|
||||||
|
|
|
@ -576,7 +576,7 @@ public:
|
||||||
long orgRecordCount = (long) labels->size();
|
long orgRecordCount = (long) labels->size();
|
||||||
long lineCount = 0;
|
long lineCount = 0;
|
||||||
SequencePosition sequencePositionLast(0, 0, seqFlagNull);
|
SequencePosition sequencePositionLast(0, 0, seqFlagNull);
|
||||||
/// get line
|
// get line
|
||||||
char ch2[MAXSTRING];
|
char ch2[MAXSTRING];
|
||||||
if (mFile == nullptr)
|
if (mFile == nullptr)
|
||||||
Microsoft::MSR::CNTK::RuntimeError("File %ls can not be loaded\n", mFileName.c_str());
|
Microsoft::MSR::CNTK::RuntimeError("File %ls can not be loaded\n", mFileName.c_str());
|
||||||
|
|
|
@ -157,7 +157,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
||||||
bSentenceStart = true;
|
bSentenceStart = true;
|
||||||
|
|
||||||
// loop through the labels for this entry
|
// loop through the labels for this entry
|
||||||
while (label < spos.labelPos) /// need to minus one since
|
while (label < spos.labelPos) // need to minus one since
|
||||||
{
|
{
|
||||||
|
|
||||||
// labelIn should be a category label
|
// labelIn should be a category label
|
||||||
|
@ -184,7 +184,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_stricmp(labelValue.c_str(), m_labelInfo[labelInfoIn].endSequence.c_str()))
|
if (!_stricmp(labelValue.c_str(), m_labelInfo[labelInfoIn].endSequence.c_str()))
|
||||||
continue; /// ignore sentence ending
|
continue; // ignore sentence ending
|
||||||
}
|
}
|
||||||
|
|
||||||
// to-do, should ignore <s>, check the sentence ending is </s>
|
// to-do, should ignore <s>, check the sentence ending is </s>
|
||||||
|
@ -265,7 +265,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
||||||
RuntimeError("cannot find sentence begining label");
|
RuntimeError("cannot find sentence begining label");
|
||||||
|
|
||||||
if (m_labelIdData[jEnd] != index)
|
if (m_labelIdData[jEnd] != index)
|
||||||
/// for language model, the first word/letter has to be <s>
|
// for language model, the first word/letter has to be <s>
|
||||||
RuntimeError("SequenceReader: the last letter/word of a batch has to be the sentence ending symbol");
|
RuntimeError("SequenceReader: the last letter/word of a batch has to be the sentence ending symbol");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -560,7 +560,7 @@ void SequenceReader<ElemType>::InitFromConfig(const ConfigRecordType& readerConf
|
||||||
const LabelInfo& labelOut = m_labelInfo[labelInfoOut];
|
const LabelInfo& labelOut = m_labelInfo[labelInfoOut];
|
||||||
m_parser.ParseInit(m_file.c_str(), m_featureDim, labelIn.dim, labelOut.dim, labelIn.beginSequence, labelIn.endSequence, labelOut.beginSequence, labelOut.endSequence);
|
m_parser.ParseInit(m_file.c_str(), m_featureDim, labelIn.dim, labelOut.dim, labelIn.beginSequence, labelIn.endSequence, labelOut.beginSequence, labelOut.endSequence);
|
||||||
|
|
||||||
/// read unk sybol
|
// read unk sybol
|
||||||
mUnk = readerConfig(L"unk", "<unk>");
|
mUnk = readerConfig(L"unk", "<unk>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -662,7 +662,7 @@ void SequenceReader<ElemType>::ReadClassInfo(const wstring& vocfile, int& class_
|
||||||
counts[p.first] = (double) p.second;
|
counts[p.first] = (double) p.second;
|
||||||
m_noiseSampler = noiseSampler<long>(counts);
|
m_noiseSampler = noiseSampler<long>(counts);
|
||||||
|
|
||||||
/// check if unk is the same used in vocabulary file
|
// check if unk is the same used in vocabulary file
|
||||||
if (word4idx.find(mUnk.c_str()) == word4idx.end())
|
if (word4idx.find(mUnk.c_str()) == word4idx.end())
|
||||||
{
|
{
|
||||||
LogicError("SequenceReader::ReadClassInfo unk symbol %s is not in vocabulary file", mUnk.c_str());
|
LogicError("SequenceReader::ReadClassInfo unk symbol %s is not in vocabulary file", mUnk.c_str());
|
||||||
|
@ -916,9 +916,9 @@ void SequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, s
|
||||||
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
||||||
m_labelData.reserve(epochSize);
|
m_labelData.reserve(epochSize);
|
||||||
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
||||||
/// this is too complicated for LM
|
// this is too complicated for LM
|
||||||
// SetupEpoch();
|
// SetupEpoch();
|
||||||
/// use the LMSetupEpoch() instead
|
// use the LMSetupEpoch() instead
|
||||||
LMSetupEpoch();
|
LMSetupEpoch();
|
||||||
|
|
||||||
m_clsinfoRead = false;
|
m_clsinfoRead = false;
|
||||||
|
@ -1034,9 +1034,9 @@ void SequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring, Matrix<Elem
|
||||||
if (class_size > 0)
|
if (class_size > 0)
|
||||||
{
|
{
|
||||||
labels->SetValue(1, j, (ElemType) clsidx);
|
labels->SetValue(1, j, (ElemType) clsidx);
|
||||||
/// save the [begining ending_indx) of the class
|
// save the [begining ending_indx) of the class
|
||||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); /// begining index of the class
|
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); // begining index of the class
|
||||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); /// end index of the class
|
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); // end index of the class
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1130,7 +1130,7 @@ void SequenceReader<ElemType>::GetClassInfo()
|
||||||
}
|
}
|
||||||
else if (prvcls > clsidx)
|
else if (prvcls > clsidx)
|
||||||
{
|
{
|
||||||
/// nwords is larger than the actual number of words
|
// nwords is larger than the actual number of words
|
||||||
LogicError("LMSequenceReader::GetClassInfo probably the number of words specified is larger than the actual number of words. Check network builder and data reader. ");
|
LogicError("LMSequenceReader::GetClassInfo probably the number of words specified is larger than the actual number of words. Check network builder and data reader. ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1414,7 +1414,7 @@ void BatchSequenceReader<ElemType>::InitFromConfig(const ConfigRecordType& reade
|
||||||
else
|
else
|
||||||
LogicError("unsupported format %ls", mode.c_str());
|
LogicError("unsupported format %ls", mode.c_str());
|
||||||
|
|
||||||
/// read unk sybol
|
// read unk sybol
|
||||||
this->mUnk = msra::strfun::utf8(readerConfig(L"unk", L"<unk>"));
|
this->mUnk = msra::strfun::utf8(readerConfig(L"unk", L"<unk>"));
|
||||||
|
|
||||||
class_size = 0;
|
class_size = 0;
|
||||||
|
@ -1660,9 +1660,9 @@ void BatchSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epo
|
||||||
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
||||||
m_labelData.reserve(epochSize);
|
m_labelData.reserve(epochSize);
|
||||||
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
||||||
/// this is too complicated for LM
|
// this is too complicated for LM
|
||||||
// SetupEpoch();
|
// SetupEpoch();
|
||||||
/// use the LMSetupEpoch() instead
|
// use the LMSetupEpoch() instead
|
||||||
LMSetupEpoch();
|
LMSetupEpoch();
|
||||||
|
|
||||||
m_clsinfoRead = false;
|
m_clsinfoRead = false;
|
||||||
|
@ -1772,7 +1772,7 @@ bool BatchSequenceReader<ElemType>::EnsureDataAvailable(size_t /*mbStartSample*/
|
||||||
sLn = FindNextSentences(mNumRead);
|
sLn = FindNextSentences(mNumRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// add one minibatch
|
// add one minibatch
|
||||||
firstPosInSentence = mLastPosInSentence;
|
firstPosInSentence = mLastPosInSentence;
|
||||||
size_t i = mLastPosInSentence;
|
size_t i = mLastPosInSentence;
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
|
@ -2105,7 +2105,7 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
||||||
|
|
||||||
labels->SetValue(1, j, (ElemType) clsidx);
|
labels->SetValue(1, j, (ElemType) clsidx);
|
||||||
|
|
||||||
/// save the [begining ending_indx) of the class
|
// save the [begining ending_indx) of the class
|
||||||
size_t lft = (size_t) (*m_classInfoLocal)(0, clsidx);
|
size_t lft = (size_t) (*m_classInfoLocal)(0, clsidx);
|
||||||
size_t rgt = (size_t) (*m_classInfoLocal)(1, clsidx);
|
size_t rgt = (size_t) (*m_classInfoLocal)(1, clsidx);
|
||||||
if (wrd < lft || lft > rgt || wrd >= rgt)
|
if (wrd < lft || lft > rgt || wrd >= rgt)
|
||||||
|
@ -2113,8 +2113,8 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
||||||
LogicError("LMSequenceReader::GetLabelOutput word %d should be at least equal to or larger than its class's left index %d; right index %d of its class should be larger or equal to left index %d of its class; word index %d should be smaller than its class's right index %d.\n",
|
LogicError("LMSequenceReader::GetLabelOutput word %d should be at least equal to or larger than its class's left index %d; right index %d of its class should be larger or equal to left index %d of its class; word index %d should be smaller than its class's right index %d.\n",
|
||||||
(int) wrd, (int) lft, (int) rgt, (int) lft, (int) wrd, (int) rgt);
|
(int) wrd, (int) lft, (int) rgt, (int) lft, (int) wrd, (int) rgt);
|
||||||
}
|
}
|
||||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); /// begining index of the class
|
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); // begining index of the class
|
||||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); /// end index of the class
|
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); // end index of the class
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (readerMode == ReaderMode::Softmax)
|
else if (readerMode == ReaderMode::Softmax)
|
||||||
|
|
|
@ -141,7 +141,7 @@ public:
|
||||||
ReaderMode readerMode;
|
ReaderMode readerMode;
|
||||||
int eos_idx, unk_idx;
|
int eos_idx, unk_idx;
|
||||||
|
|
||||||
string mUnk; /// unk symbol
|
string mUnk; // unk symbol
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// typedef std::string LabelType;
|
// typedef std::string LabelType;
|
||||||
|
@ -158,7 +158,7 @@ protected:
|
||||||
size_t m_totalSamples; // number of samples in the dataset
|
size_t m_totalSamples; // number of samples in the dataset
|
||||||
size_t m_featureDim; // feature dimensions for extra features
|
size_t m_featureDim; // feature dimensions for extra features
|
||||||
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
||||||
/// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||||
size_t m_readNextSampleLine; // next sample to read Line
|
size_t m_readNextSampleLine; // next sample to read Line
|
||||||
size_t m_readNextSample; // next sample to read
|
size_t m_readNextSample; // next sample to read
|
||||||
size_t m_seqIndex; // index into the m_sequence array
|
size_t m_seqIndex; // index into the m_sequence array
|
||||||
|
@ -413,7 +413,7 @@ public:
|
||||||
}
|
}
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
/// return length of sentences size
|
// return length of sentences size
|
||||||
size_t FindNextSentences(size_t numSentences);
|
size_t FindNextSentences(size_t numSentences);
|
||||||
bool DataEnd(EndDataType endDataType);
|
bool DataEnd(EndDataType endDataType);
|
||||||
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
||||||
|
|
|
@ -48,7 +48,7 @@ void LMSequenceWriter<ElemType>::InitFromConfig(const ConfigRecordType& writerCo
|
||||||
int iN = thisOutput(L"nbest", 1);
|
int iN = thisOutput(L"nbest", 1);
|
||||||
nBests[outputNames[i]] = iN;
|
nBests[outputNames[i]] = iN;
|
||||||
wstring fname = thisOutput(L"token");
|
wstring fname = thisOutput(L"token");
|
||||||
/// read unk sybol
|
// read unk sybol
|
||||||
mUnk[outputNames[i]] = writerConfig(L"unk", "<unk>");
|
mUnk[outputNames[i]] = writerConfig(L"unk", "<unk>");
|
||||||
|
|
||||||
SequenceReader<ElemType>::ReadClassInfo(fname, class_size,
|
SequenceReader<ElemType>::ReadClassInfo(fname, class_size,
|
||||||
|
|
|
@ -28,7 +28,7 @@ private:
|
||||||
map<wstring, map<int, size_t>> idx4cnt;
|
map<wstring, map<int, size_t>> idx4cnt;
|
||||||
int nwords;
|
int nwords;
|
||||||
|
|
||||||
map<wstring, string> mUnk; /// unk symbol
|
map<wstring, string> mUnk; // unk symbol
|
||||||
|
|
||||||
int noise_sample_size;
|
int noise_sample_size;
|
||||||
noiseSampler<long> m_noiseSampler;
|
noiseSampler<long> m_noiseSampler;
|
||||||
|
|
|
@ -57,7 +57,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
||||||
long orgRecordCount = (long) labels->size();
|
long orgRecordCount = (long) labels->size();
|
||||||
long lineCount = 0;
|
long lineCount = 0;
|
||||||
long tokenCount = 0;
|
long tokenCount = 0;
|
||||||
bool bAtEOS = false; /// whether the reader is at the end of sentence position
|
bool bAtEOS = false; // whether the reader is at the end of sentence position
|
||||||
SequencePosition sequencePositionLast(0, 0, 0);
|
SequencePosition sequencePositionLast(0, 0, 0);
|
||||||
|
|
||||||
wstring ch;
|
wstring ch;
|
||||||
|
@ -70,7 +70,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
||||||
{
|
{
|
||||||
if (canMultiplePassData)
|
if (canMultiplePassData)
|
||||||
{
|
{
|
||||||
ParseReset(); /// restart from the corpus begining
|
ParseReset(); // restart from the corpus begining
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -118,7 +118,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
||||||
labels->push_back(outputlabel2id.find(vstr[vstr.size() - 1])->second);
|
labels->push_back(outputlabel2id.find(vstr[vstr.size() - 1])->second);
|
||||||
input->push_back(vtmp);
|
input->push_back(vtmp);
|
||||||
if ((vstr[vstr.size() - 1] == m_endSequenceOut ||
|
if ((vstr[vstr.size() - 1] == m_endSequenceOut ||
|
||||||
/// below is for backward support
|
// below is for backward support
|
||||||
vstr[0] == m_endTag) &&
|
vstr[0] == m_endTag) &&
|
||||||
input->size() > 0 && labels->size() > 0)
|
input->size() > 0 && labels->size() > 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -511,7 +511,7 @@ void BatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t e
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
|
|
||||||
m_parser.ParseReset(); /// restart from the corpus beginning
|
m_parser.ParseReset(); // restart from the corpus beginning
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
|
@ -585,7 +585,7 @@ size_t BatchLUSequenceReader<ElemType>::FindNextSentences(size_t numRead)
|
||||||
mToProcess.push_back(seq);
|
mToProcess.push_back(seq);
|
||||||
mMaxSentenceLength = max((int) mMaxSentenceLength, ln);
|
mMaxSentenceLength = max((int) mMaxSentenceLength, ln);
|
||||||
if (previousLn == -1)
|
if (previousLn == -1)
|
||||||
mLastProcessedSentenceId = seq + 1; /// update index for the next retrieval
|
mLastProcessedSentenceId = seq + 1; // update index for the next retrieval
|
||||||
previousLn = ln;
|
previousLn = ln;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -952,8 +952,8 @@ size_t BatchLUSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
||||||
ElemType rgt = (*labelInfo.m_classInfoLocal)(1, clsidx);
|
ElemType rgt = (*labelInfo.m_classInfoLocal)(1, clsidx);
|
||||||
if (rgt <= lft)
|
if (rgt <= lft)
|
||||||
LogicError("LUSequenceReader : right is equal or smaller than the left, which is wrong.");
|
LogicError("LUSequenceReader : right is equal or smaller than the left, which is wrong.");
|
||||||
labels->SetValue(2, j, lft); /// beginning index of the class
|
labels->SetValue(2, j, lft); // beginning index of the class
|
||||||
labels->SetValue(3, j, rgt); /// end index of the class
|
labels->SetValue(3, j, rgt); // end index of the class
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
LogicError("LUSequenceReader: reader mode is not set to Plain. Or in the case of setting it to Class, the class number is 0. ");
|
LogicError("LUSequenceReader: reader mode is not set to Plain. Or in the case of setting it to Class, the class number is 0. ");
|
||||||
|
@ -1094,7 +1094,7 @@ bool BatchLUSequenceReader<ElemType>::GetFrame(std::map<std::wstring, Matrix<Ele
|
||||||
{
|
{
|
||||||
int cxt = m_wordContext[jj];
|
int cxt = m_wordContext[jj];
|
||||||
|
|
||||||
/// assert that wordContext is organized as descending order
|
// assert that wordContext is organized as descending order
|
||||||
assert((jj == m_wordContext.size() - 1) ? true : cxt > m_wordContext[jj + 1]);
|
assert((jj == m_wordContext.size() - 1) ? true : cxt > m_wordContext[jj + 1]);
|
||||||
|
|
||||||
size_t hidx;
|
size_t hidx;
|
||||||
|
@ -1134,7 +1134,7 @@ void BatchLUSequenceReader<ElemType>::InitProposals(map<wstring, Matrix<ElemType
|
||||||
{
|
{
|
||||||
if (m_labelInfo[labelInfoIn].isproposal)
|
if (m_labelInfo[labelInfoIn].isproposal)
|
||||||
{
|
{
|
||||||
/// no need to save info for labelInfoIn since it is in mProposals
|
// no need to save info for labelInfoIn since it is in mProposals
|
||||||
if (pMat.find(m_labelsName[labelInfoOut]) != pMat.end())
|
if (pMat.find(m_labelsName[labelInfoOut]) != pMat.end())
|
||||||
mMatrices[m_labelsName[labelInfoOut]].SetValue(*(pMat[m_labelsName[labelInfoOut]]));
|
mMatrices[m_labelsName[labelInfoOut]].SetValue(*(pMat[m_labelsName[labelInfoOut]]));
|
||||||
}
|
}
|
||||||
|
@ -1184,7 +1184,7 @@ template class BatchLUSequenceReader<float>;
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||||
{
|
{
|
||||||
/// on first iteration, need to check if all requested data matrices are available
|
// on first iteration, need to check if all requested data matrices are available
|
||||||
std::map<std::wstring, size_t>::iterator iter;
|
std::map<std::wstring, size_t>::iterator iter;
|
||||||
if (mCheckDictionaryKeys)
|
if (mCheckDictionaryKeys)
|
||||||
{
|
{
|
||||||
|
@ -1207,14 +1207,14 @@ bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring,
|
||||||
mCheckDictionaryKeys = false;
|
mCheckDictionaryKeys = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// set the same random seed
|
// set the same random seed
|
||||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||||
{
|
{
|
||||||
p->second->SetRandomSeed(this->m_seed);
|
p->second->SetRandomSeed(this->m_seed);
|
||||||
}
|
}
|
||||||
this->m_seed++;
|
this->m_seed++;
|
||||||
|
|
||||||
/// run for each reader
|
// run for each reader
|
||||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||||
{
|
{
|
||||||
if ((p->second)->GetMinibatch(matrices) == false)
|
if ((p->second)->GetMinibatch(matrices) == false)
|
||||||
|
@ -1242,7 +1242,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
||||||
vector<wstring> ioNames = readerConfig(L"ioNodeNames", ConfigRecordType::Array(stringargvector()));
|
vector<wstring> ioNames = readerConfig(L"ioNodeNames", ConfigRecordType::Array(stringargvector()));
|
||||||
if (ioNames.size() > 0)
|
if (ioNames.size() > 0)
|
||||||
{
|
{
|
||||||
/// newer code that explicitly place multiple streams for inputs
|
// newer code that explicitly place multiple streams for inputs
|
||||||
foreach_index (i, ioNames) // inputNames should map to node names
|
foreach_index (i, ioNames) // inputNames should map to node names
|
||||||
{
|
{
|
||||||
const ConfigRecordType& thisIO = readerConfig(ioNames[i]);
|
const ConfigRecordType& thisIO = readerConfig(ioNames[i]);
|
||||||
|
@ -1257,7 +1257,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// older code that assumes only one stream of feature
|
// older code that assumes only one stream of feature
|
||||||
BatchLUSequenceReader<ElemType>* thisReader = new BatchLUSequenceReader<ElemType>();
|
BatchLUSequenceReader<ElemType>* thisReader = new BatchLUSequenceReader<ElemType>();
|
||||||
|
|
||||||
thisReader->Init(readerConfig);
|
thisReader->Init(readerConfig);
|
||||||
|
@ -1271,7 +1271,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
void MultiIOBatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
|
void MultiIOBatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
|
||||||
{
|
{
|
||||||
/// run for each reader
|
// run for each reader
|
||||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||||
{
|
{
|
||||||
(p->second)->StartMinibatchLoop(mbSize, epoch, requestedEpochSamples);
|
(p->second)->StartMinibatchLoop(mbSize, epoch, requestedEpochSamples);
|
||||||
|
@ -1281,7 +1281,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, s
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
void MultiIOBatchLUSequenceReader<ElemType>::CopyMBLayoutTo(MBLayoutPtr pMBLayout)
|
void MultiIOBatchLUSequenceReader<ElemType>::CopyMBLayoutTo(MBLayoutPtr pMBLayout)
|
||||||
{
|
{
|
||||||
/// run for each reader
|
// run for each reader
|
||||||
vector<size_t> col;
|
vector<size_t> col;
|
||||||
size_t rows = 0, cols = 0;
|
size_t rows = 0, cols = 0;
|
||||||
for (const auto& p : mReader)
|
for (const auto& p : mReader)
|
||||||
|
@ -1332,7 +1332,7 @@ bool MultiIOBatchLUSequenceReader<ElemType>::DataEnd(EndDataType endDataType)
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
bool MultiIOBatchLUSequenceReader<ElemType>::GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history)
|
bool MultiIOBatchLUSequenceReader<ElemType>::GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history)
|
||||||
{
|
{
|
||||||
/// run for each reader
|
// run for each reader
|
||||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||||
{
|
{
|
||||||
if ((p->second)->GetFrame(matrices, tidx, history) == false)
|
if ((p->second)->GetFrame(matrices, tidx, history) == false)
|
||||||
|
@ -1348,7 +1348,7 @@ bool MultiIOBatchLUSequenceReader<ElemType>::GetProposalObs(std::map<std::wstrin
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
void MultiIOBatchLUSequenceReader<ElemType>::InitProposals(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
void MultiIOBatchLUSequenceReader<ElemType>::InitProposals(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||||
{
|
{
|
||||||
/// run for each reader
|
// run for each reader
|
||||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||||
{
|
{
|
||||||
(p->second)->InitProposals(matrices);
|
(p->second)->InitProposals(matrices);
|
||||||
|
|
|
@ -64,13 +64,13 @@ public:
|
||||||
bool mRandomize;
|
bool mRandomize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// deal with OOV
|
// deal with OOV
|
||||||
map<LabelType, LabelType> mWordMapping;
|
map<LabelType, LabelType> mWordMapping;
|
||||||
wstring mWordMappingFn;
|
wstring mWordMappingFn;
|
||||||
LabelType mUnkStr;
|
LabelType mUnkStr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// accumulated number of sentneces read so far
|
// accumulated number of sentneces read so far
|
||||||
unsigned long mTotalSentenceSofar;
|
unsigned long mTotalSentenceSofar;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -83,7 +83,7 @@ protected:
|
||||||
size_t m_totalSamples; // number of samples in the dataset
|
size_t m_totalSamples; // number of samples in the dataset
|
||||||
size_t m_featureDim; // feature dimensions for extra features
|
size_t m_featureDim; // feature dimensions for extra features
|
||||||
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
||||||
/// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||||
size_t m_readNextSampleLine; // next sample to read Line
|
size_t m_readNextSampleLine; // next sample to read Line
|
||||||
size_t m_readNextSample; // next sample to read
|
size_t m_readNextSample; // next sample to read
|
||||||
size_t m_seqIndex; // index into the m_sequence array
|
size_t m_seqIndex; // index into the m_sequence array
|
||||||
|
@ -130,11 +130,11 @@ protected:
|
||||||
long dim; // maximum label ID we will ever see (used for array dimensions)
|
long dim; // maximum label ID we will ever see (used for array dimensions)
|
||||||
LabelType beginSequence; // starting sequence string (i.e. <s>)
|
LabelType beginSequence; // starting sequence string (i.e. <s>)
|
||||||
LabelType endSequence; // ending sequence string (i.e. </s>)
|
LabelType endSequence; // ending sequence string (i.e. </s>)
|
||||||
bool busewordmap; /// whether using wordmap to map unseen words to unk
|
bool busewordmap; // whether using wordmap to map unseen words to unk
|
||||||
std::wstring mapName;
|
std::wstring mapName;
|
||||||
std::wstring fileToWrite; // set to the path if we need to write out the label file
|
std::wstring fileToWrite; // set to the path if we need to write out the label file
|
||||||
|
|
||||||
bool isproposal; /// whether this is for proposal generation
|
bool isproposal; // whether this is for proposal generation
|
||||||
|
|
||||||
ReaderMode readerMode;
|
ReaderMode readerMode;
|
||||||
/**
|
/**
|
||||||
|
@ -298,7 +298,7 @@ public:
|
||||||
}
|
}
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
/// return length of sentences size
|
// return length of sentences size
|
||||||
size_t FindNextSentences(size_t numSentences);
|
size_t FindNextSentences(size_t numSentences);
|
||||||
bool DataEnd(EndDataType endDataType);
|
bool DataEnd(EndDataType endDataType);
|
||||||
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
||||||
|
@ -345,7 +345,7 @@ public:
|
||||||
|
|
||||||
template <class ConfigRecordType>
|
template <class ConfigRecordType>
|
||||||
void LoadWordMapping(const ConfigRecordType& config);
|
void LoadWordMapping(const ConfigRecordType& config);
|
||||||
bool CanReadFor(wstring nodeName); /// return true if this reader can output for a node with name nodeName
|
bool CanReadFor(wstring nodeName); // return true if this reader can output for a node with name nodeName
|
||||||
|
|
||||||
vector<size_t> ReturnToProcessId()
|
vector<size_t> ReturnToProcessId()
|
||||||
{
|
{
|
||||||
|
@ -365,12 +365,12 @@ public:
|
||||||
/**
|
/**
|
||||||
for sequential reading data, useful for beam search decoding
|
for sequential reading data, useful for beam search decoding
|
||||||
*/
|
*/
|
||||||
/// this is for frame-by-frame reading of data.
|
// this is for frame-by-frame reading of data.
|
||||||
/// data is first read into these matrices and then if needed is column-by-column retrieved
|
// data is first read into these matrices and then if needed is column-by-column retrieved
|
||||||
map<wstring, Matrix<ElemType>> mMatrices;
|
map<wstring, Matrix<ElemType>> mMatrices;
|
||||||
bool GetFrame(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
bool GetFrame(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
||||||
|
|
||||||
/// create proposals
|
// create proposals
|
||||||
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -443,7 +443,7 @@ public:
|
||||||
int GetSentenceEndIdFromOutputLabel();
|
int GetSentenceEndIdFromOutputLabel();
|
||||||
bool DataEnd(EndDataType endDataType);
|
bool DataEnd(EndDataType endDataType);
|
||||||
|
|
||||||
/// create proposals
|
// create proposals
|
||||||
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
||||||
bool GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
bool GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
||||||
};
|
};
|
||||||
|
|
|
@ -657,7 +657,7 @@ void UCIFastReader<ElemType>::StartDistributedMinibatchLoop(size_t mbSize, size_
|
||||||
m_subsetNum = subsetNum;
|
m_subsetNum = subsetNum;
|
||||||
m_numSubsets = numSubsets;
|
m_numSubsets = numSubsets;
|
||||||
if (mOneLinePerFile)
|
if (mOneLinePerFile)
|
||||||
mbSize = mRequestedNumParallelSequences; /// each file has only one observation, therefore the number of data to read is the number of files
|
mbSize = mRequestedNumParallelSequences; // each file has only one observation, therefore the number of data to read is the number of files
|
||||||
|
|
||||||
// if we aren't currently caching, see if we can use a cache
|
// if we aren't currently caching, see if we can use a cache
|
||||||
if (!m_cachingReader && !m_cachingWriter)
|
if (!m_cachingReader && !m_cachingWriter)
|
||||||
|
|
|
@ -2350,7 +2350,7 @@ bool SGD<ElemType>::GradientCheck(ComputationNetworkPtr net,
|
||||||
|
|
||||||
for (size_t itry = 0; itry < min((size_t) 50, node->Value().GetNumElements()); itry++)
|
for (size_t itry = 0; itry < min((size_t) 50, node->Value().GetNumElements()); itry++)
|
||||||
{
|
{
|
||||||
/// no support to sparse matrix yet
|
// no support to sparse matrix yet
|
||||||
int irow = (int) fmod(rand(), node->Gradient().GetNumRows() - 1);
|
int irow = (int) fmod(rand(), node->Gradient().GetNumRows() - 1);
|
||||||
int icol = (int) fmod(rand(), node->Gradient().GetNumCols() - 1);
|
int icol = (int) fmod(rand(), node->Gradient().GetNumCols() - 1);
|
||||||
irow = max(0, irow);
|
irow = max(0, irow);
|
||||||
|
@ -2584,8 +2584,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
||||||
m_L2RegWeight = configSGD(L"L2RegWeight", 0.0);
|
m_L2RegWeight = configSGD(L"L2RegWeight", 0.0);
|
||||||
m_L1RegWeight = configSGD(L"L1RegWeight", 0.0);
|
m_L1RegWeight = configSGD(L"L1RegWeight", 0.0);
|
||||||
|
|
||||||
/// for backward support. future setup should use gradUpdateType=AdaGrad, instead of
|
// for backward support. future setup should use gradUpdateType=AdaGrad, instead of
|
||||||
/// useAdagrad=true
|
// useAdagrad=true
|
||||||
bool useAdagrad = configSGD(L"useAdagrad", false);
|
bool useAdagrad = configSGD(L"useAdagrad", false);
|
||||||
if (useAdagrad)
|
if (useAdagrad)
|
||||||
{
|
{
|
||||||
|
@ -2596,7 +2596,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
||||||
m_adaptationRegType = ParseAdaptationRegType(configSGD(L"adaptationRegType", L"None"));
|
m_adaptationRegType = ParseAdaptationRegType(configSGD(L"adaptationRegType", L"None"));
|
||||||
m_adaptationRegWeight = configSGD(L"adaptationRegWeight", 0.0);
|
m_adaptationRegWeight = configSGD(L"adaptationRegWeight", 0.0);
|
||||||
|
|
||||||
/// gradient check setup
|
// gradient check setup
|
||||||
m_doGradientCheck = configSGD(L"gradientcheck", false);
|
m_doGradientCheck = configSGD(L"gradientcheck", false);
|
||||||
m_gradientCheckSigDigit = configSGD(L"sigFigs", 6.0); // TODO: why is this a double?
|
m_gradientCheckSigDigit = configSGD(L"sigFigs", 6.0); // TODO: why is this a double?
|
||||||
|
|
||||||
|
|
|
@ -101,8 +101,8 @@ public:
|
||||||
|
|
||||||
totalEpochSamples += actualMBSize;
|
totalEpochSamples += actualMBSize;
|
||||||
|
|
||||||
/// call DataEnd function in dataReader to do
|
// call DataEnd function in dataReader to do
|
||||||
/// reader specific process if sentence ending is reached
|
// reader specific process if sentence ending is reached
|
||||||
dataReader.DataEnd(endDataSentence);
|
dataReader.DataEnd(endDataSentence);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,11 +26,11 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
|
||||||
|
|
||||||
assert(nStream == sentenceBegin.GetNumRows());
|
assert(nStream == sentenceBegin.GetNumRows());
|
||||||
|
|
||||||
/// only set state to init state value for segmentation = 0, and -1
|
// only set state to init state value for segmentation = 0, and -1
|
||||||
/// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
||||||
|
|
||||||
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
|
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
|
||||||
colPos.SetValue(sentenceBegin); /// -1 0 1
|
colPos.SetValue(sentenceBegin); // -1 0 1
|
||||||
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||||
Matrix<ElemType>::Scale((ElemType) -1.0, colPos);
|
Matrix<ElemType>::Scale((ElemType) -1.0, colPos);
|
||||||
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
|
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
|
||||||
|
@ -38,8 +38,8 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
|
||||||
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
|
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
|
||||||
ones.Resize(nStateRow, nStream);
|
ones.Resize(nStateRow, nStream);
|
||||||
ones.SetValue((ElemType) 1);
|
ones.SetValue((ElemType) 1);
|
||||||
/// add default state value if it is for reset
|
// add default state value if it is for reset
|
||||||
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); /// += [0 initStateValue 0 ]
|
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
|
@ -107,7 +107,7 @@ void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool
|
||||||
int d = iPastIndex;
|
int d = iPastIndex;
|
||||||
if (d < 0)
|
if (d < 0)
|
||||||
d = (int) functionValues.Mod((float) iPastIndex, (float) pastActivity.GetNumCols());
|
d = (int) functionValues.Mod((float) iPastIndex, (float) pastActivity.GetNumCols());
|
||||||
/// this can point to the past activity of the previous mninibatch
|
// this can point to the past activity of the previous mninibatch
|
||||||
|
|
||||||
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
|
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
|
||||||
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
|
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||||
|
|
Загрузка…
Ссылка в новой задаче