normalized comment separator from /// to //
This commit is contained in:
Родитель
03a4fcb295
Коммит
27641d86d0
|
@ -271,7 +271,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
}
|
||||
std::unordered_map<string, double> v_count;
|
||||
|
||||
/// get line
|
||||
// get line
|
||||
string str;
|
||||
vector<string> vstr;
|
||||
long long prevClsIdx = -1;
|
||||
|
@ -422,7 +422,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
long long clsIdx = nbrCls > 0 ? m_class[i] : 0;
|
||||
if (nbrCls > 0 && clsIdx != prevClsIdx)
|
||||
{
|
||||
cls2idx(clsIdx, 0) = (ElemType) i; /// the left boundary of clsIdx
|
||||
cls2idx(clsIdx, 0) = (ElemType) i; // the left boundary of clsIdx
|
||||
prevClsIdx = m_class[i];
|
||||
}
|
||||
ofvocab << " " << i << "\t " << m_count[i] << "\t" << m_words[i] << "\t" << clsIdx << std::endl;
|
||||
|
@ -431,7 +431,7 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config)
|
|||
ofvocab.close();
|
||||
if (nbrCls > 0)
|
||||
{
|
||||
/// write the outputs
|
||||
// write the outputs
|
||||
msra::files::make_intermediate_dirs(s2ws(outputWord2Cls));
|
||||
ofstream ofp(outputWord2Cls.c_str());
|
||||
if (!ofp)
|
||||
|
|
|
@ -204,7 +204,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
|||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||
|
@ -235,7 +235,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN()
|
|||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||
|
@ -316,7 +316,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
|||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1);
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||
|
@ -346,7 +346,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
|||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i + 1], 1);
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, pastValue)),
|
||||
|
@ -366,13 +366,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork()
|
|||
}
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
// need to have [input_dim x output_dim] matrix
|
||||
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 4);
|
||||
|
||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
|
@ -444,7 +444,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
{
|
||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
/// previously used function. now uses LSTMNode which is correct and fast
|
||||
// previously used function. now uses LSTMNode which is correct and fast
|
||||
input = output;
|
||||
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
||||
{
|
||||
|
@ -458,7 +458,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
}
|
||||
}
|
||||
|
||||
/// serve as a global bias term
|
||||
// serve as a global bias term
|
||||
gt = builder.CreateInputNode(L"binaryFeature", m_auxFeatDim);
|
||||
m_net->FeatureNodes().push_back(gt);
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"AuxTrans%d", 0),
|
||||
|
@ -468,13 +468,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
output = builder.Plus(input, u, L"PlusGlobalBias");
|
||||
input = output;
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
// need to have [input_dim x output_dim] matrix
|
||||
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 4);
|
||||
|
||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
|
@ -542,7 +542,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFro
|
|||
}
|
||||
|
||||
int recur_idx = 0;
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
int ik = 1;
|
||||
output = input;
|
||||
while (ik <= m_maOrder)
|
||||
|
@ -675,7 +675,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFrom
|
|||
Wxi = builder.CreateLearnableParameter(L"WXI", m_layerSizes[1], m_layerSizes[0]);
|
||||
m_net->InitLearnableParameters(Wxi, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
// unless there is a good algorithm to detect loops, use this explicit setup
|
||||
it = builder.Plus(
|
||||
builder.Tanh(
|
||||
builder.Plus(
|
||||
|
@ -994,7 +994,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFrom
|
|||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
|
@ -1097,7 +1097,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
|||
{
|
||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
/// previously used function. now uses LSTMNode which is correct and fast
|
||||
// previously used function. now uses LSTMNode which is correct and fast
|
||||
input = output;
|
||||
for (int i = 1 + offset; i < numHiddenLayers; i++)
|
||||
{
|
||||
|
@ -1111,13 +1111,13 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromD
|
|||
}
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
// need to have [input_dim x output_dim] matrix
|
||||
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 4);
|
||||
|
||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
|
@ -1158,16 +1158,16 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
|
|||
size_t nDim = inputDim + outputDim + 2;
|
||||
wInputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WINPUTGATE%d", iLayer), outputDim, nDim);
|
||||
m_net->InitLearnableParameters(wInputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
wInputGate->Value().ColumnSlice(0, 1).SetValue(m_inputGateInitVal); /// init to input gate bias
|
||||
wInputGate->Value().ColumnSlice(0, 1).SetValue(m_inputGateInitVal); // init to input gate bias
|
||||
wForgetGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WFORGETGATE%d", iLayer), outputDim, nDim);
|
||||
m_net->InitLearnableParameters(wForgetGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
wForgetGate->Value().ColumnSlice(0, 1).SetValue(m_forgetGateInitVal); /// init to forget gate bias
|
||||
wForgetGate->Value().ColumnSlice(0, 1).SetValue(m_forgetGateInitVal); // init to forget gate bias
|
||||
wOutputGate = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WOUTPUTGATE%d", iLayer), outputDim, nDim);
|
||||
m_net->InitLearnableParameters(wOutputGate, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
wOutputGate->Value().ColumnSlice(0, 1).SetValue(m_outputGateInitVal); /// init to output gate bias
|
||||
wOutputGate->Value().ColumnSlice(0, 1).SetValue(m_outputGateInitVal); // init to output gate bias
|
||||
wMemoryCellMatrix = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"WMEMORYCELLWEIGHT%d", iLayer), outputDim, inputDim + outputDim + 1);
|
||||
m_net->InitLearnableParameters(wMemoryCellMatrix, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
wMemoryCellMatrix->Value().ColumnSlice(0, 1).SetValue(0); /// init to memory cell bias
|
||||
wMemoryCellMatrix->Value().ColumnSlice(0, 1).SetValue(0); // init to memory cell bias
|
||||
|
||||
output = builder.LSTM(inputObs, wInputGate, wForgetGate, wOutputGate, wMemoryCellMatrix, msra::strfun::wstrprintf(L"LSTM%d", iLayer));
|
||||
|
||||
|
@ -1241,7 +1241,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
|||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
|
@ -1250,7 +1250,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
|||
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
/// previously used function. now uses LSTMNode which is correct and fast
|
||||
// previously used function. now uses LSTMNode which is correct and fast
|
||||
input = output;
|
||||
outputFromEachLayer[offset + 1] = input;
|
||||
|
||||
|
@ -1543,7 +1543,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDes
|
|||
outputFromEachLayer[1] = input;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
|
@ -1580,19 +1580,19 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDes
|
|||
|
||||
for (size_t i = offset; i < m_layerSizes.size(); i++)
|
||||
{
|
||||
/// add direct connect from each layers' output to the layer before the output layer
|
||||
// add direct connect from each layers' output to the layer before the output layer
|
||||
output = BuildDirectConnect(randomSeed, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input);
|
||||
if (output != nullptr)
|
||||
input = output;
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
// need to have [input_dim x output_dim] matrix
|
||||
// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 2 * (this->nce_noises + 1));
|
||||
|
||||
bias = builder.CreateLearnableParameter(L"BiasVector", 1, m_layerSizes[m_layerSizes.size() - 1]);
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
enum RNNTYPE
|
||||
{
|
||||
SIMPLENET = 0, /// no recurrent connections
|
||||
SIMPLENET = 0, // no recurrent connections
|
||||
SIMPLERNN = 1,
|
||||
LSTM = 2,
|
||||
DEEPRNN = 4,
|
||||
|
@ -147,9 +147,9 @@ public:
|
|||
|
||||
ConfigArray sSizes = config("streamSizes", "");
|
||||
m_streamSizes = sSizes;
|
||||
sSizes = config("lookupTableOrderSizes", ""); /// this allows having a multiple streams of inputs with
|
||||
/// different lookuptable order sizes. the older one lookupTableOrder is still kept to have backward
|
||||
/// support.
|
||||
sSizes = config("lookupTableOrderSizes", ""); // this allows having a multiple streams of inputs with
|
||||
// different lookuptable order sizes. the older one lookupTableOrder is still kept to have backward
|
||||
// support.
|
||||
m_lookupTabelOrderSizes = sSizes;
|
||||
|
||||
m_labelEmbeddingSize = config("labelEmbeddingSize", "10");
|
||||
|
@ -346,14 +346,14 @@ protected:
|
|||
TrainingCriterion m_trainCriterion;
|
||||
EvalCriterion m_evalCriterion;
|
||||
|
||||
intargvector m_directConnect; /// connect those layers directly in a sequence order
|
||||
/// for example: 1:2:3 will connect 1 to 2 and then 2 to 3
|
||||
intargvector m_directConnect; // connect those layers directly in a sequence order
|
||||
// for example: 1:2:3 will connect 1 to 2 and then 2 to 3
|
||||
|
||||
/// recurrent network
|
||||
// recurrent network
|
||||
intargvector m_recurrentLayers;
|
||||
float m_defaultHiddenActivity;
|
||||
RNNTYPE m_rnnType;
|
||||
int m_maOrder; /// MA model order
|
||||
int m_maOrder; // MA model order
|
||||
|
||||
bool m_constForgetGateValue;
|
||||
bool m_constInputGateValue;
|
||||
|
@ -363,18 +363,18 @@ protected:
|
|||
ElemType m_inputGateInitVal;
|
||||
ElemType m_outputGateInitVal;
|
||||
|
||||
intargvector m_streamSizes; /// for multiple stream data
|
||||
intargvector m_lookupTabelOrderSizes; /// each stream has its own projection, so need to provide with the lookup table order size for each stream
|
||||
intargvector m_streamSizes; // for multiple stream data
|
||||
intargvector m_lookupTabelOrderSizes; // each stream has its own projection, so need to provide with the lookup table order size for each stream
|
||||
|
||||
int m_lookupTableOrder;
|
||||
int m_labelEmbeddingSize;
|
||||
|
||||
/// these are the file names for word 2 class mapping and class to word index mapping
|
||||
/// these are used for class-based language modeling
|
||||
// these are the file names for word 2 class mapping and class to word index mapping
|
||||
// these are used for class-based language modeling
|
||||
string m_cls2index;
|
||||
string m_word2class;
|
||||
int m_nbrCls; /// number of classes
|
||||
int m_vocabSize; /// vocabulary size
|
||||
int m_nbrCls; // number of classes
|
||||
int m_vocabSize; // vocabulary size
|
||||
int nce_noises;
|
||||
|
||||
bool m_sparse_input;
|
||||
|
|
|
@ -51,7 +51,7 @@ void DataReader<ElemType>::InitFromConfig(const ConfigRecordType& /*config*/)
|
|||
template <class ElemType>
|
||||
void DataReader<ElemType>::Destroy()
|
||||
{
|
||||
/// newer code that explicitly place multiple streams for inputs
|
||||
// newer code that explicitly place multiple streams for inputs
|
||||
foreach_index (i, m_ioNames) // inputNames should map to node names
|
||||
{
|
||||
m_dataReaders[m_ioNames[i]]->Destroy();
|
||||
|
|
|
@ -935,7 +935,7 @@ protected:
|
|||
std::vector<ComputationNodeBasePtr> m_finalCriteria;
|
||||
std::vector<ComputationNodeBasePtr> m_evalNodes;
|
||||
std::vector<ComputationNodeBasePtr> m_outputNodes;
|
||||
std::vector<ComputationNodeBasePtr> m_pairNodes; /// nodes for the children network to pair
|
||||
std::vector<ComputationNodeBasePtr> m_pairNodes; // nodes for the children network to pair
|
||||
vector<std::vector<ComputationNodeBasePtr>*> GetAllNodeGroups() // get all groups to allow to iterate over all of them ...continue
|
||||
{
|
||||
return vector<std::vector<ComputationNodeBasePtr>*>{&m_features, &m_labels, &m_finalCriteria, &m_evalNodes, &m_outputNodes, &m_pairNodes};
|
||||
|
|
|
@ -1576,7 +1576,7 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
/// these two are used to pass gradients from future minibatch
|
||||
// these two are used to pass gradients from future minibatch
|
||||
virtual void GetErrorsToPreviousMinibatch(Matrix<ElemType>&)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -176,7 +176,7 @@ public:
|
|||
static void DecideStartEndingOutputLab(const Matrix<ElemType>& lbls, int& stt, int& stp)
|
||||
{
|
||||
if (stt != -1 && stp != -1)
|
||||
return; /// have computed before
|
||||
return; // have computed before
|
||||
|
||||
int iNumPos = lbls.GetNumCols();
|
||||
|
||||
|
@ -214,7 +214,7 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// compute posterior probability of label y at position t
|
||||
// compute posterior probability of label y at position t
|
||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||
{
|
||||
DecideStartEndingOutputLab(Input(0)->Value(), mStartLab, mEndLab);
|
||||
|
@ -225,27 +225,27 @@ public:
|
|||
// compute forward backward algorithm
|
||||
void ForwardPropS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
|
||||
{
|
||||
/// to-do, each slice is for one sentence
|
||||
/// to-do, number of slices correspond to number of frames
|
||||
/// this implementation only supports one sentence per minibatch
|
||||
// to-do, each slice is for one sentence
|
||||
// to-do, number of slices correspond to number of frames
|
||||
// this implementation only supports one sentence per minibatch
|
||||
|
||||
/// change to other values so can support multiple sentences in each minibatch
|
||||
// change to other values so can support multiple sentences in each minibatch
|
||||
ForwardCompute(alpha, backtrace, pos_scores, pair_scores, stt);
|
||||
BackwardCompute(functionValues, backtrace, stp);
|
||||
};
|
||||
|
||||
/// compute forward backward algorithm
|
||||
// compute forward backward algorithm
|
||||
static void ForwardCompute(Matrix<ElemType>& alpha,
|
||||
Matrix<ElemType>& backtrace,
|
||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores,
|
||||
const size_t stt)
|
||||
{
|
||||
/// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||
// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||
int iNumPos = pos_scores.GetNumCols();
|
||||
int iNumLab = pos_scores.GetNumRows();
|
||||
size_t iTmp = 0;
|
||||
|
||||
/// need to have
|
||||
// need to have
|
||||
alpha.Resize(iNumLab, iNumPos);
|
||||
backtrace.Resize(iNumLab, iNumPos);
|
||||
|
||||
|
@ -265,11 +265,11 @@ public:
|
|||
iTmp = j;
|
||||
}
|
||||
}
|
||||
fTmp += pos_scores(k, t); /// include position dependent score
|
||||
fTmp += pos_scores(k, t); // include position dependent score
|
||||
}
|
||||
else
|
||||
{
|
||||
/// with constrain that the first word is labeled as a given symbol
|
||||
// with constrain that the first word is labeled as a given symbol
|
||||
iTmp = stt;
|
||||
fTmp = 0;
|
||||
if (t == 1)
|
||||
|
@ -289,7 +289,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/// compute backward algorithm
|
||||
// compute backward algorithm
|
||||
static void BackwardCompute(
|
||||
Matrix<ElemType>& decodedpath,
|
||||
const Matrix<ElemType>& backtrace, const size_t stp)
|
||||
|
@ -310,8 +310,8 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/// need to feed in pseudo label data, which tells the decoder what is the beginning
|
||||
/// and ending output symbol. these symbols will constrain the search space
|
||||
// need to feed in pseudo label data, which tells the decoder what is the beginning
|
||||
// and ending output symbol. these symbols will constrain the search space
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
|
|
|
@ -530,7 +530,7 @@ public:
|
|||
|
||||
ForwardProp(FrameRange(m_pMBLayout));
|
||||
|
||||
/// check with expected values
|
||||
// check with expected values
|
||||
Value().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
|
||||
if (!ISCLOSE(Value()(0, 0), 1.0, EPSILON) ||
|
||||
!ISCLOSE(Value()(0, 1), 2.0, EPSILON) ||
|
||||
|
@ -550,7 +550,7 @@ public:
|
|||
BackpropTo(i, FrameRange(m_pMBLayout));
|
||||
|
||||
// check with expected values
|
||||
if (!ISCLOSE(Input(1)->Gradient()(0, 0), 2, EPSILON) /// bi
|
||||
if (!ISCLOSE(Input(1)->Gradient()(0, 0), 2, EPSILON) // bi
|
||||
|| !ISCLOSE(Input(1)->Gradient()(0, 1), 2, EPSILON) // Wxi
|
||||
|| !ISCLOSE(Input(1)->Gradient()(1, 0), 2, EPSILON) // Whi
|
||||
|| !ISCLOSE(Input(1)->Gradient()(2, 1), 2, EPSILON) // Wci
|
||||
|
|
|
@ -1003,9 +1003,9 @@ protected:
|
|||
Matrix<ElemType> m_clsLogSoftmax;
|
||||
Matrix<ElemType> m_clsSoftmax;
|
||||
|
||||
/// gradient of cross entropy with respect to the input of softmax
|
||||
/// a 1 row by \sum_t m_nbrWordsInEachTime[t] vector
|
||||
/// one slice of size m_nbrWordsInEachTime[t] saves the input to softmax for word y_t
|
||||
// gradient of cross entropy with respect to the input of softmax
|
||||
// a 1 row by \sum_t m_nbrWordsInEachTime[t] vector
|
||||
// one slice of size m_nbrWordsInEachTime[t] saves the input to softmax for word y_t
|
||||
Matrix<ElemType> m_grdToSoftMaxInput;
|
||||
bool m_needRecomputeGradientToSoftmaxInput;
|
||||
|
||||
|
@ -1061,7 +1061,7 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
/// compute posterior probability of label y at position t
|
||||
// compute posterior probability of label y at position t
|
||||
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
|
||||
{
|
||||
FrameRange fr(Input(0)->GetMBLayout());
|
||||
|
@ -1136,13 +1136,13 @@ public:
|
|||
// compute forward backward algorithm
|
||||
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
|
||||
{
|
||||
/// to-do, each slice is for one sentence
|
||||
/// to-do, number of slices correspond to number of frames
|
||||
/// this implementation only supports one sentence per minibatch
|
||||
// to-do, each slice is for one sentence
|
||||
// to-do, number of slices correspond to number of frames
|
||||
// this implementation only supports one sentence per minibatch
|
||||
|
||||
int nObs = lbls.GetNumCols();
|
||||
|
||||
/// change to other values so can support multiple sentences in each minibatch
|
||||
// change to other values so can support multiple sentences in each minibatch
|
||||
assert(iStep == 1);
|
||||
ForwardCompute(alpha, lbls, pos_scores, pair_scores);
|
||||
BackwardCompute(alpha, beta, functionValues, lbls, pos_scores, pair_scores, iStep);
|
||||
|
@ -1170,7 +1170,7 @@ public:
|
|||
ElemType fAlpha;
|
||||
fAlpha = a.LogAddSumOfElements();
|
||||
|
||||
/// transition score
|
||||
// transition score
|
||||
ElemType tscore = 0;
|
||||
for (int t = 0; t < nObs - 1; t++)
|
||||
{
|
||||
|
@ -1190,19 +1190,19 @@ public:
|
|||
}
|
||||
tscore += pair_scores(j, i);
|
||||
}
|
||||
tscore += functionValues.Get00Element(); /// correct path score
|
||||
tscore -= fAlpha; /// reduced by the scores from all paths
|
||||
tscore += functionValues.Get00Element(); // correct path score
|
||||
tscore -= fAlpha; // reduced by the scores from all paths
|
||||
functionValues.SetValue(tscore);
|
||||
|
||||
functionValues *= (-1);
|
||||
}
|
||||
|
||||
/// compute forward backward algorithm
|
||||
// compute forward backward algorithm
|
||||
static void ForwardCompute(Matrix<ElemType>& alpha,
|
||||
const Matrix<ElemType>& lbls,
|
||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores)
|
||||
{
|
||||
/// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||
// to-do, shift more than 1 to support muliple sentences per minibatch
|
||||
int iNumPos = lbls.GetNumCols();
|
||||
int iNumLab = lbls.GetNumRows();
|
||||
|
||||
|
@ -1214,7 +1214,7 @@ public:
|
|||
break;
|
||||
}
|
||||
|
||||
/// need to have
|
||||
// need to have
|
||||
alpha.Resize(iNumLab, iNumPos);
|
||||
|
||||
for (int t = 0; t < iNumPos; t++)
|
||||
|
@ -1229,13 +1229,13 @@ public:
|
|||
fAlpha = alpha(j, t - 1);
|
||||
fTmp = alpha.LogAdd(fTmp, fAlpha + pair_scores(k, j));
|
||||
}
|
||||
fTmp += pos_scores(k, t); /// include position dependent score
|
||||
fTmp += pos_scores(k, t); // include position dependent score
|
||||
alpha(k, t) = fTmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// compute backward algorithm
|
||||
// compute backward algorithm
|
||||
static void BackwardCompute(const Matrix<ElemType>& alpha, Matrix<ElemType>& beta,
|
||||
Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
|
||||
const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const int shift = 1)
|
||||
|
@ -1263,7 +1263,7 @@ public:
|
|||
startLbl, shift);
|
||||
}
|
||||
|
||||
/// compute forward backward algorithm
|
||||
// compute forward backward algorithm
|
||||
static void PostProbCompute(Matrix<ElemType>& postprob, const Matrix<ElemType>& alpha, const Matrix<ElemType>& beta)
|
||||
{
|
||||
int iNumPos = alpha.GetNumCols();
|
||||
|
|
|
@ -5359,7 +5359,7 @@ void CPUMatrix<ElemType>::RCRFTransGrdCompute(const CPUMatrix<ElemType>& lbls,
|
|||
_rcrfTransGrdCompute(i, lbls, alpha, beta, pair_scores, grd, tPos);
|
||||
}
|
||||
|
||||
/// transition score
|
||||
// transition score
|
||||
int i = -1;
|
||||
if (tPos == 0)
|
||||
i = firstLbl;
|
||||
|
@ -5394,7 +5394,7 @@ void CPUMatrix<ElemType>::_rcrfTransGrdCompute(size_t i,
|
|||
const CPUMatrix<ElemType>& beta,
|
||||
const CPUMatrix<ElemType>& pair_scores,
|
||||
CPUMatrix<ElemType>& grd,
|
||||
const size_t tPos /// position
|
||||
const size_t tPos // position
|
||||
)
|
||||
{
|
||||
int iNumLab = (int) alpha.GetNumRows();
|
||||
|
|
|
@ -370,7 +370,7 @@ public:
|
|||
static CPUMatrix<ElemType> RandomUniform(const size_t rows, const size_t cols, const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
|
||||
static CPUMatrix<ElemType> RandomGaussian(const size_t rows, const size_t cols, const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
|
||||
|
||||
/// return true if v is an element in matrix c
|
||||
// return true if v is an element in matrix c
|
||||
static bool HasElement(const CPUMatrix<ElemType>& a, const ElemType v = 0.0);
|
||||
|
||||
public:
|
||||
|
@ -426,7 +426,7 @@ public:
|
|||
ElemType LogAddSumOfElements() const;
|
||||
|
||||
public:
|
||||
/// for RCRF
|
||||
// for RCRF
|
||||
static void RCRFBackwardCompute(const CPUMatrix<ElemType>& alpha, CPUMatrix<ElemType>& beta,
|
||||
const CPUMatrix<ElemType>& lbls,
|
||||
const CPUMatrix<ElemType>& pair_scores);
|
||||
|
@ -446,7 +446,7 @@ public:
|
|||
const CPUMatrix<ElemType>& beta,
|
||||
const CPUMatrix<ElemType>& pair_scores,
|
||||
CPUMatrix<ElemType>& grd,
|
||||
const size_t tPos /// position
|
||||
const size_t tPos // position
|
||||
);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -97,7 +97,7 @@ public:
|
|||
|
||||
static bool AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
|
||||
|
||||
/// sum(vec(a).*vec(b))
|
||||
// sum(vec(a).*vec(b))
|
||||
static ElemType InnerProductOfMatrices(const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
|
|
|
@ -419,7 +419,7 @@ public:
|
|||
|
||||
static void AddElementToElement(const GPUMatrix<ElemType>& a, const size_t ai, const size_t aj, GPUMatrix<ElemType>& c, const size_t ci, const size_t cj);
|
||||
|
||||
/// minus one at a specific position
|
||||
// minus one at a specific position
|
||||
static void MinusOneAt(GPUMatrix<ElemType>& c, const size_t position);
|
||||
|
||||
static void Scale(ElemType alpha, const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c);
|
||||
|
@ -478,7 +478,7 @@ public:
|
|||
const GPUMatrix<ElemType>& beta,
|
||||
const GPUMatrix<ElemType>& pair_scores,
|
||||
GPUMatrix<ElemType>& grd,
|
||||
const int startLbl, /// the time 0 start symbol in the output layer
|
||||
const int startLbl, // the time 0 start symbol in the output layer
|
||||
const int shift);
|
||||
|
||||
public:
|
||||
|
|
|
@ -1345,7 +1345,7 @@ template <class ElemType>
|
|||
__global__ void _hasElement(
|
||||
const ElemType* a,
|
||||
const CUDA_LONG N,
|
||||
ElemType* d_res /// [2x1] vector. The first is the value to be compared and the second is the 0/1 to return
|
||||
ElemType* d_res // [2x1] vector. The first is the value to be compared and the second is the 0/1 to return
|
||||
)
|
||||
{
|
||||
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
@ -4594,15 +4594,15 @@ __global__ void _minusOneAt(
|
|||
template <class ElemType>
|
||||
__global__ void _rcrfBackwardCompute(
|
||||
const size_t iNumPos,
|
||||
const ElemType* galpha, /// column slice at current time t
|
||||
ElemType* gbeta, /// column slices with [row, 2] at current time t for [
|
||||
const ElemType* galpha, // column slice at current time t
|
||||
ElemType* gbeta, // column slices with [row, 2] at current time t for [
|
||||
const ElemType* gpair_scores,
|
||||
const size_t iNumLab, const int shift)
|
||||
{
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
|
||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||
ElemType* pair_scores = alpha + iNumPos * iNumLab;
|
||||
|
@ -4611,7 +4611,7 @@ __global__ void _rcrfBackwardCompute(
|
|||
if (id < 0 || id >= iNumLab)
|
||||
return;
|
||||
|
||||
/// copy global memory to shared memory to save time
|
||||
// copy global memory to shared memory to save time
|
||||
for (int t = iNumPos - 1; t >= 0; t--)
|
||||
{
|
||||
alpha[IDX2C(id, t, iNumLab)] = galpha[IDX2C(id, t, iNumLab)];
|
||||
|
@ -4654,7 +4654,7 @@ __global__ void _rcrfBackwardCompute(
|
|||
__syncthreads();
|
||||
}
|
||||
|
||||
/// copy from shared memory to global memory to pass values
|
||||
// copy from shared memory to global memory to pass values
|
||||
for (int t = iNumPos - 1; t >= 0; t--)
|
||||
{
|
||||
gbeta[IDX2C(id, t, iNumLab)] = beta[IDX2C(id, t, iNumLab)];
|
||||
|
@ -4666,18 +4666,18 @@ __global__ void _rcrfBackwardCompute(
|
|||
/// assume a column slice of input and output
|
||||
template <class ElemType>
|
||||
__global__ void _rcrfBackwardCompute(
|
||||
const size_t t, /// time position
|
||||
const size_t t, // time position
|
||||
const size_t iNumPos,
|
||||
const ElemType* galpha, /// column slice at current time t
|
||||
ElemType* gbeta, /// column slices with [row, 2] at current time t for [
|
||||
const ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
||||
const ElemType* gpair_scores, /// column slice at current time t
|
||||
const ElemType* galpha, // column slice at current time t
|
||||
ElemType* gbeta, // column slices with [row, 2] at current time t for [
|
||||
const ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||
const ElemType* gpair_scores, // column slice at current time t
|
||||
const size_t iNumLab, const int shift)
|
||||
{
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
|
||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||
ElemType* beta_t1 = (ElemType*) (alpha + iNumLab);
|
||||
|
@ -4687,7 +4687,7 @@ __global__ void _rcrfBackwardCompute(
|
|||
if (id < 0 || id >= iNumLab)
|
||||
return;
|
||||
|
||||
/// copy global memory to shared memory to save time
|
||||
// copy global memory to shared memory to save time
|
||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||
if (t < iNumPos - 1)
|
||||
beta_t1[id] = gbeta[IDX2C(id, t + 1, iNumLab)];
|
||||
|
@ -4717,17 +4717,17 @@ __global__ void _rcrfBackwardCompute(
|
|||
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
||||
template <class ElemType>
|
||||
__global__ void _rcrfBackwardComputeZeta(
|
||||
const size_t t, /// time position
|
||||
const size_t t, // time position
|
||||
const size_t iNumPos,
|
||||
const ElemType* galpha, /// column slice at current time t
|
||||
ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
||||
const ElemType* galpha, // column slice at current time t
|
||||
ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||
const ElemType* gpair_scores,
|
||||
const size_t iNumLab, const int shift)
|
||||
{
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
|
||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||
ElemType pair_scores[1024];
|
||||
|
@ -4735,7 +4735,7 @@ __global__ void _rcrfBackwardComputeZeta(
|
|||
if (id < 0 || id >= iNumLab)
|
||||
return;
|
||||
|
||||
/// copy global memory to shared memory to save time
|
||||
// copy global memory to shared memory to save time
|
||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||
|
||||
__syncthreads();
|
||||
|
@ -4758,10 +4758,10 @@ __global__ void _rcrfBackwardComputeZeta(
|
|||
/// $\zeta_t(j) = {\sum_k exp(\delta_{t-1}(k) + a_{kj}(t))}$.
|
||||
template <class ElemType>
|
||||
__global__ void _rcrfTransGrdComputeZeta(
|
||||
const int t, /// time position
|
||||
const int t, // time position
|
||||
const size_t iNumPos,
|
||||
const ElemType* galpha, /// column slice at current time t
|
||||
ElemType* gzeta, /// column slices with [row, 2] at current time t for [
|
||||
const ElemType* galpha, // column slice at current time t
|
||||
ElemType* gzeta, // column slices with [row, 2] at current time t for [
|
||||
const ElemType* gpair_scores,
|
||||
const size_t iNumLab,
|
||||
const size_t start_lbl,
|
||||
|
@ -4769,8 +4769,8 @@ __global__ void _rcrfTransGrdComputeZeta(
|
|||
{
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
|
||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||
ElemType pair_scores[1024];
|
||||
|
@ -4778,7 +4778,7 @@ __global__ void _rcrfTransGrdComputeZeta(
|
|||
if (id < 0 || id >= iNumLab)
|
||||
return;
|
||||
|
||||
/// copy global memory to shared memory to save time
|
||||
// copy global memory to shared memory to save time
|
||||
if (t >= 0)
|
||||
alpha[id] = galpha[IDX2C(id, t, iNumLab)];
|
||||
|
||||
|
@ -4823,8 +4823,8 @@ __global__ void _rcrfTransGrdCompute(
|
|||
{
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
extern __shared__ double sh_alpha_and_beta[]; /// intersting, has to use [], instead of *
|
||||
/// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
extern __shared__ double sh_alpha_and_beta[]; // intersting, has to use [], instead of *
|
||||
// need bye size = (iNumPos * iNumLab * 2 + iNumLab * iNumLab) * sizeof(ElemType)
|
||||
|
||||
ElemType* alpha = (ElemType*) (sh_alpha_and_beta);
|
||||
ElemType* beta = (ElemType*) (alpha + iNumLab);
|
||||
|
@ -4834,7 +4834,7 @@ __global__ void _rcrfTransGrdCompute(
|
|||
if (id < 0 || id >= iNumLab)
|
||||
return;
|
||||
|
||||
/// copy global memory to shared memory to save time
|
||||
// copy global memory to shared memory to save time
|
||||
if (t > 0)
|
||||
alpha[id] = galpha[IDX2C(id, t - 1, iNumLab)];
|
||||
beta[id] = gbeta[IDX2C(id, t, iNumLab)];
|
||||
|
@ -4897,7 +4897,7 @@ __global__ void _reductionLogAddSum(
|
|||
|
||||
__syncthreads();
|
||||
|
||||
/// do reduction on the shared memory
|
||||
// do reduction on the shared memory
|
||||
size_t start_width = ceil((N + 0.0) / 2.0);
|
||||
for (size_t s = start_width; s > 0; s >>= 1)
|
||||
{
|
||||
|
|
|
@ -548,7 +548,7 @@ public:
|
|||
const Matrix<ElemType>& beta,
|
||||
const Matrix<ElemType>& pair_scores,
|
||||
Matrix<ElemType>& grd,
|
||||
const int startLbl, /// the time 0 start symbol in the output layer
|
||||
const int startLbl, // the time 0 start symbol in the output layer
|
||||
const int shift);
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -35,7 +35,7 @@ private:
|
|||
// this is used to prevent CUDA out-of memory errors
|
||||
|
||||
vector<size_t> m_numFramesToProcess; // [seq index] number of frames available (left to return) in each parallel sequence
|
||||
vector<size_t> m_switchFrame; /// TODO: something like the position where a new sequence starts; still supported?
|
||||
vector<size_t> m_switchFrame; // TODO: something like the position where a new sequence starts; still supported?
|
||||
vector<size_t> m_numValidFrames; // [seq index] valid #frames in each parallel sequence. Frames (s, t) with t >= m_numValidFrames[s] are NoInput.
|
||||
vector<size_t> m_extraSeqsPerMB;
|
||||
size_t m_extraNumSeqs;
|
||||
|
@ -132,10 +132,10 @@ private:
|
|||
public:
|
||||
MBLayoutPtr m_pMBLayout;
|
||||
|
||||
/// by default it is false
|
||||
/// if true, reader will set to ((int) MinibatchPackingFlags::None) for time positions that are orignally correspond to ((int) MinibatchPackingFlags::SequenceStart)
|
||||
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
/// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
// by default it is false
|
||||
// if true, reader will set to ((int) MinibatchPackingFlags::None) for time positions that are orignally correspond to ((int) MinibatchPackingFlags::SequenceStart)
|
||||
// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
// TODO: this ^^ does not seem to belong here.
|
||||
|
||||
|
|
|
@ -160,10 +160,10 @@ private:
|
|||
public:
|
||||
MBLayoutPtr m_pMBLayout;
|
||||
|
||||
/// by default it is false
|
||||
/// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
|
||||
/// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
/// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
// by default it is false
|
||||
// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START
|
||||
// set to true so that a current minibatch can uses state activities from the previous minibatch.
|
||||
// default will have truncated BPTT, which only does BPTT inside a minibatch
|
||||
bool mIgnoreSentenceBeginTag;
|
||||
HTKMLFReader()
|
||||
: m_pMBLayout(make_shared<MBLayout>())
|
||||
|
|
|
@ -576,7 +576,7 @@ public:
|
|||
long orgRecordCount = (long) labels->size();
|
||||
long lineCount = 0;
|
||||
SequencePosition sequencePositionLast(0, 0, seqFlagNull);
|
||||
/// get line
|
||||
// get line
|
||||
char ch2[MAXSTRING];
|
||||
if (mFile == nullptr)
|
||||
Microsoft::MSR::CNTK::RuntimeError("File %ls can not be loaded\n", mFileName.c_str());
|
||||
|
|
|
@ -157,7 +157,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
|||
bSentenceStart = true;
|
||||
|
||||
// loop through the labels for this entry
|
||||
while (label < spos.labelPos) /// need to minus one since
|
||||
while (label < spos.labelPos) // need to minus one since
|
||||
{
|
||||
|
||||
// labelIn should be a category label
|
||||
|
@ -184,7 +184,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
|||
}
|
||||
|
||||
if (!_stricmp(labelValue.c_str(), m_labelInfo[labelInfoIn].endSequence.c_str()))
|
||||
continue; /// ignore sentence ending
|
||||
continue; // ignore sentence ending
|
||||
}
|
||||
|
||||
// to-do, should ignore <s>, check the sentence ending is </s>
|
||||
|
@ -265,7 +265,7 @@ bool SequenceReader<ElemType>::EnsureDataAvailable(size_t mbStartSample, bool /*
|
|||
RuntimeError("cannot find sentence begining label");
|
||||
|
||||
if (m_labelIdData[jEnd] != index)
|
||||
/// for language model, the first word/letter has to be <s>
|
||||
// for language model, the first word/letter has to be <s>
|
||||
RuntimeError("SequenceReader: the last letter/word of a batch has to be the sentence ending symbol");
|
||||
}
|
||||
}
|
||||
|
@ -560,7 +560,7 @@ void SequenceReader<ElemType>::InitFromConfig(const ConfigRecordType& readerConf
|
|||
const LabelInfo& labelOut = m_labelInfo[labelInfoOut];
|
||||
m_parser.ParseInit(m_file.c_str(), m_featureDim, labelIn.dim, labelOut.dim, labelIn.beginSequence, labelIn.endSequence, labelOut.beginSequence, labelOut.endSequence);
|
||||
|
||||
/// read unk sybol
|
||||
// read unk sybol
|
||||
mUnk = readerConfig(L"unk", "<unk>");
|
||||
}
|
||||
|
||||
|
@ -662,7 +662,7 @@ void SequenceReader<ElemType>::ReadClassInfo(const wstring& vocfile, int& class_
|
|||
counts[p.first] = (double) p.second;
|
||||
m_noiseSampler = noiseSampler<long>(counts);
|
||||
|
||||
/// check if unk is the same used in vocabulary file
|
||||
// check if unk is the same used in vocabulary file
|
||||
if (word4idx.find(mUnk.c_str()) == word4idx.end())
|
||||
{
|
||||
LogicError("SequenceReader::ReadClassInfo unk symbol %s is not in vocabulary file", mUnk.c_str());
|
||||
|
@ -916,9 +916,9 @@ void SequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, s
|
|||
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
||||
m_labelData.reserve(epochSize);
|
||||
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
||||
/// this is too complicated for LM
|
||||
// this is too complicated for LM
|
||||
// SetupEpoch();
|
||||
/// use the LMSetupEpoch() instead
|
||||
// use the LMSetupEpoch() instead
|
||||
LMSetupEpoch();
|
||||
|
||||
m_clsinfoRead = false;
|
||||
|
@ -1034,9 +1034,9 @@ void SequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring, Matrix<Elem
|
|||
if (class_size > 0)
|
||||
{
|
||||
labels->SetValue(1, j, (ElemType) clsidx);
|
||||
/// save the [begining ending_indx) of the class
|
||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); /// begining index of the class
|
||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); /// end index of the class
|
||||
// save the [begining ending_indx) of the class
|
||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); // begining index of the class
|
||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); // end index of the class
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1130,7 +1130,7 @@ void SequenceReader<ElemType>::GetClassInfo()
|
|||
}
|
||||
else if (prvcls > clsidx)
|
||||
{
|
||||
/// nwords is larger than the actual number of words
|
||||
// nwords is larger than the actual number of words
|
||||
LogicError("LMSequenceReader::GetClassInfo probably the number of words specified is larger than the actual number of words. Check network builder and data reader. ");
|
||||
}
|
||||
}
|
||||
|
@ -1414,7 +1414,7 @@ void BatchSequenceReader<ElemType>::InitFromConfig(const ConfigRecordType& reade
|
|||
else
|
||||
LogicError("unsupported format %ls", mode.c_str());
|
||||
|
||||
/// read unk sybol
|
||||
// read unk sybol
|
||||
this->mUnk = msra::strfun::utf8(readerConfig(L"unk", L"<unk>"));
|
||||
|
||||
class_size = 0;
|
||||
|
@ -1660,9 +1660,9 @@ void BatchSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epo
|
|||
else if (m_labelInfo[labelInfoOut].type != labelNone)
|
||||
m_labelData.reserve(epochSize);
|
||||
m_sequence.reserve(m_seqIndex); // clear out the sequence array
|
||||
/// this is too complicated for LM
|
||||
// this is too complicated for LM
|
||||
// SetupEpoch();
|
||||
/// use the LMSetupEpoch() instead
|
||||
// use the LMSetupEpoch() instead
|
||||
LMSetupEpoch();
|
||||
|
||||
m_clsinfoRead = false;
|
||||
|
@ -1772,7 +1772,7 @@ bool BatchSequenceReader<ElemType>::EnsureDataAvailable(size_t /*mbStartSample*/
|
|||
sLn = FindNextSentences(mNumRead);
|
||||
}
|
||||
|
||||
/// add one minibatch
|
||||
// add one minibatch
|
||||
firstPosInSentence = mLastPosInSentence;
|
||||
size_t i = mLastPosInSentence;
|
||||
size_t j = 0;
|
||||
|
@ -2105,7 +2105,7 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
|||
|
||||
labels->SetValue(1, j, (ElemType) clsidx);
|
||||
|
||||
/// save the [begining ending_indx) of the class
|
||||
// save the [begining ending_indx) of the class
|
||||
size_t lft = (size_t) (*m_classInfoLocal)(0, clsidx);
|
||||
size_t rgt = (size_t) (*m_classInfoLocal)(1, clsidx);
|
||||
if (wrd < lft || lft > rgt || wrd >= rgt)
|
||||
|
@ -2113,8 +2113,8 @@ void BatchSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
|||
LogicError("LMSequenceReader::GetLabelOutput word %d should be at least equal to or larger than its class's left index %d; right index %d of its class should be larger or equal to left index %d of its class; word index %d should be smaller than its class's right index %d.\n",
|
||||
(int) wrd, (int) lft, (int) rgt, (int) lft, (int) wrd, (int) rgt);
|
||||
}
|
||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); /// begining index of the class
|
||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); /// end index of the class
|
||||
labels->SetValue(2, j, (*m_classInfoLocal)(0, clsidx)); // begining index of the class
|
||||
labels->SetValue(3, j, (*m_classInfoLocal)(1, clsidx)); // end index of the class
|
||||
}
|
||||
}
|
||||
else if (readerMode == ReaderMode::Softmax)
|
||||
|
|
|
@ -141,7 +141,7 @@ public:
|
|||
ReaderMode readerMode;
|
||||
int eos_idx, unk_idx;
|
||||
|
||||
string mUnk; /// unk symbol
|
||||
string mUnk; // unk symbol
|
||||
|
||||
public:
|
||||
// typedef std::string LabelType;
|
||||
|
@ -158,7 +158,7 @@ protected:
|
|||
size_t m_totalSamples; // number of samples in the dataset
|
||||
size_t m_featureDim; // feature dimensions for extra features
|
||||
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
||||
/// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||
// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||
size_t m_readNextSampleLine; // next sample to read Line
|
||||
size_t m_readNextSample; // next sample to read
|
||||
size_t m_seqIndex; // index into the m_sequence array
|
||||
|
@ -413,7 +413,7 @@ public:
|
|||
}
|
||||
void Reset();
|
||||
|
||||
/// return length of sentences size
|
||||
// return length of sentences size
|
||||
size_t FindNextSentences(size_t numSentences);
|
||||
bool DataEnd(EndDataType endDataType);
|
||||
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
||||
|
|
|
@ -48,7 +48,7 @@ void LMSequenceWriter<ElemType>::InitFromConfig(const ConfigRecordType& writerCo
|
|||
int iN = thisOutput(L"nbest", 1);
|
||||
nBests[outputNames[i]] = iN;
|
||||
wstring fname = thisOutput(L"token");
|
||||
/// read unk sybol
|
||||
// read unk sybol
|
||||
mUnk[outputNames[i]] = writerConfig(L"unk", "<unk>");
|
||||
|
||||
SequenceReader<ElemType>::ReadClassInfo(fname, class_size,
|
||||
|
|
|
@ -28,7 +28,7 @@ private:
|
|||
map<wstring, map<int, size_t>> idx4cnt;
|
||||
int nwords;
|
||||
|
||||
map<wstring, string> mUnk; /// unk symbol
|
||||
map<wstring, string> mUnk; // unk symbol
|
||||
|
||||
int noise_sample_size;
|
||||
noiseSampler<long> m_noiseSampler;
|
||||
|
|
|
@ -57,7 +57,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
|||
long orgRecordCount = (long) labels->size();
|
||||
long lineCount = 0;
|
||||
long tokenCount = 0;
|
||||
bool bAtEOS = false; /// whether the reader is at the end of sentence position
|
||||
bool bAtEOS = false; // whether the reader is at the end of sentence position
|
||||
SequencePosition sequencePositionLast(0, 0, 0);
|
||||
|
||||
wstring ch;
|
||||
|
@ -70,7 +70,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
|||
{
|
||||
if (canMultiplePassData)
|
||||
{
|
||||
ParseReset(); /// restart from the corpus begining
|
||||
ParseReset(); // restart from the corpus begining
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
@ -118,7 +118,7 @@ long BatchLUSequenceParser<NumType, LabelType>::Parse(size_t recordsRequested, s
|
|||
labels->push_back(outputlabel2id.find(vstr[vstr.size() - 1])->second);
|
||||
input->push_back(vtmp);
|
||||
if ((vstr[vstr.size() - 1] == m_endSequenceOut ||
|
||||
/// below is for backward support
|
||||
// below is for backward support
|
||||
vstr[0] == m_endTag) &&
|
||||
input->size() > 0 && labels->size() > 0)
|
||||
{
|
||||
|
|
|
@ -511,7 +511,7 @@ void BatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t e
|
|||
|
||||
Reset();
|
||||
|
||||
m_parser.ParseReset(); /// restart from the corpus beginning
|
||||
m_parser.ParseReset(); // restart from the corpus beginning
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -585,7 +585,7 @@ size_t BatchLUSequenceReader<ElemType>::FindNextSentences(size_t numRead)
|
|||
mToProcess.push_back(seq);
|
||||
mMaxSentenceLength = max((int) mMaxSentenceLength, ln);
|
||||
if (previousLn == -1)
|
||||
mLastProcessedSentenceId = seq + 1; /// update index for the next retrieval
|
||||
mLastProcessedSentenceId = seq + 1; // update index for the next retrieval
|
||||
previousLn = ln;
|
||||
}
|
||||
}
|
||||
|
@ -952,8 +952,8 @@ size_t BatchLUSequenceReader<ElemType>::GetLabelOutput(std::map<std::wstring,
|
|||
ElemType rgt = (*labelInfo.m_classInfoLocal)(1, clsidx);
|
||||
if (rgt <= lft)
|
||||
LogicError("LUSequenceReader : right is equal or smaller than the left, which is wrong.");
|
||||
labels->SetValue(2, j, lft); /// beginning index of the class
|
||||
labels->SetValue(3, j, rgt); /// end index of the class
|
||||
labels->SetValue(2, j, lft); // beginning index of the class
|
||||
labels->SetValue(3, j, rgt); // end index of the class
|
||||
}
|
||||
else
|
||||
LogicError("LUSequenceReader: reader mode is not set to Plain. Or in the case of setting it to Class, the class number is 0. ");
|
||||
|
@ -1094,7 +1094,7 @@ bool BatchLUSequenceReader<ElemType>::GetFrame(std::map<std::wstring, Matrix<Ele
|
|||
{
|
||||
int cxt = m_wordContext[jj];
|
||||
|
||||
/// assert that wordContext is organized as descending order
|
||||
// assert that wordContext is organized as descending order
|
||||
assert((jj == m_wordContext.size() - 1) ? true : cxt > m_wordContext[jj + 1]);
|
||||
|
||||
size_t hidx;
|
||||
|
@ -1134,7 +1134,7 @@ void BatchLUSequenceReader<ElemType>::InitProposals(map<wstring, Matrix<ElemType
|
|||
{
|
||||
if (m_labelInfo[labelInfoIn].isproposal)
|
||||
{
|
||||
/// no need to save info for labelInfoIn since it is in mProposals
|
||||
// no need to save info for labelInfoIn since it is in mProposals
|
||||
if (pMat.find(m_labelsName[labelInfoOut]) != pMat.end())
|
||||
mMatrices[m_labelsName[labelInfoOut]].SetValue(*(pMat[m_labelsName[labelInfoOut]]));
|
||||
}
|
||||
|
@ -1184,7 +1184,7 @@ template class BatchLUSequenceReader<float>;
|
|||
template <class ElemType>
|
||||
bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
/// on first iteration, need to check if all requested data matrices are available
|
||||
// on first iteration, need to check if all requested data matrices are available
|
||||
std::map<std::wstring, size_t>::iterator iter;
|
||||
if (mCheckDictionaryKeys)
|
||||
{
|
||||
|
@ -1207,14 +1207,14 @@ bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(std::map<std::wstring,
|
|||
mCheckDictionaryKeys = false;
|
||||
}
|
||||
|
||||
/// set the same random seed
|
||||
// set the same random seed
|
||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||
{
|
||||
p->second->SetRandomSeed(this->m_seed);
|
||||
}
|
||||
this->m_seed++;
|
||||
|
||||
/// run for each reader
|
||||
// run for each reader
|
||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||
{
|
||||
if ((p->second)->GetMinibatch(matrices) == false)
|
||||
|
@ -1242,7 +1242,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
|||
vector<wstring> ioNames = readerConfig(L"ioNodeNames", ConfigRecordType::Array(stringargvector()));
|
||||
if (ioNames.size() > 0)
|
||||
{
|
||||
/// newer code that explicitly place multiple streams for inputs
|
||||
// newer code that explicitly place multiple streams for inputs
|
||||
foreach_index (i, ioNames) // inputNames should map to node names
|
||||
{
|
||||
const ConfigRecordType& thisIO = readerConfig(ioNames[i]);
|
||||
|
@ -1257,7 +1257,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
|||
}
|
||||
else
|
||||
{
|
||||
/// older code that assumes only one stream of feature
|
||||
// older code that assumes only one stream of feature
|
||||
BatchLUSequenceReader<ElemType>* thisReader = new BatchLUSequenceReader<ElemType>();
|
||||
|
||||
thisReader->Init(readerConfig);
|
||||
|
@ -1271,7 +1271,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::InitFromConfig(const ConfigRecordTy
|
|||
template <class ElemType>
|
||||
void MultiIOBatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
|
||||
{
|
||||
/// run for each reader
|
||||
// run for each reader
|
||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||
{
|
||||
(p->second)->StartMinibatchLoop(mbSize, epoch, requestedEpochSamples);
|
||||
|
@ -1281,7 +1281,7 @@ void MultiIOBatchLUSequenceReader<ElemType>::StartMinibatchLoop(size_t mbSize, s
|
|||
template <class ElemType>
|
||||
void MultiIOBatchLUSequenceReader<ElemType>::CopyMBLayoutTo(MBLayoutPtr pMBLayout)
|
||||
{
|
||||
/// run for each reader
|
||||
// run for each reader
|
||||
vector<size_t> col;
|
||||
size_t rows = 0, cols = 0;
|
||||
for (const auto& p : mReader)
|
||||
|
@ -1332,7 +1332,7 @@ bool MultiIOBatchLUSequenceReader<ElemType>::DataEnd(EndDataType endDataType)
|
|||
template <class ElemType>
|
||||
bool MultiIOBatchLUSequenceReader<ElemType>::GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history)
|
||||
{
|
||||
/// run for each reader
|
||||
// run for each reader
|
||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||
{
|
||||
if ((p->second)->GetFrame(matrices, tidx, history) == false)
|
||||
|
@ -1348,7 +1348,7 @@ bool MultiIOBatchLUSequenceReader<ElemType>::GetProposalObs(std::map<std::wstrin
|
|||
template <class ElemType>
|
||||
void MultiIOBatchLUSequenceReader<ElemType>::InitProposals(std::map<std::wstring, Matrix<ElemType>*>& matrices)
|
||||
{
|
||||
/// run for each reader
|
||||
// run for each reader
|
||||
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++)
|
||||
{
|
||||
(p->second)->InitProposals(matrices);
|
||||
|
|
|
@ -64,13 +64,13 @@ public:
|
|||
bool mRandomize;
|
||||
|
||||
public:
|
||||
/// deal with OOV
|
||||
// deal with OOV
|
||||
map<LabelType, LabelType> mWordMapping;
|
||||
wstring mWordMappingFn;
|
||||
LabelType mUnkStr;
|
||||
|
||||
public:
|
||||
/// accumulated number of sentneces read so far
|
||||
// accumulated number of sentneces read so far
|
||||
unsigned long mTotalSentenceSofar;
|
||||
|
||||
protected:
|
||||
|
@ -83,7 +83,7 @@ protected:
|
|||
size_t m_totalSamples; // number of samples in the dataset
|
||||
size_t m_featureDim; // feature dimensions for extra features
|
||||
size_t m_featureCount; // total number of non-zero features (in labelsDim + extra features dim)
|
||||
/// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||
// for language modeling, the m_featureCount = 1, since there is only one nonzero element
|
||||
size_t m_readNextSampleLine; // next sample to read Line
|
||||
size_t m_readNextSample; // next sample to read
|
||||
size_t m_seqIndex; // index into the m_sequence array
|
||||
|
@ -130,11 +130,11 @@ protected:
|
|||
long dim; // maximum label ID we will ever see (used for array dimensions)
|
||||
LabelType beginSequence; // starting sequence string (i.e. <s>)
|
||||
LabelType endSequence; // ending sequence string (i.e. </s>)
|
||||
bool busewordmap; /// whether using wordmap to map unseen words to unk
|
||||
bool busewordmap; // whether using wordmap to map unseen words to unk
|
||||
std::wstring mapName;
|
||||
std::wstring fileToWrite; // set to the path if we need to write out the label file
|
||||
|
||||
bool isproposal; /// whether this is for proposal generation
|
||||
bool isproposal; // whether this is for proposal generation
|
||||
|
||||
ReaderMode readerMode;
|
||||
/**
|
||||
|
@ -298,7 +298,7 @@ public:
|
|||
}
|
||||
void Reset();
|
||||
|
||||
/// return length of sentences size
|
||||
// return length of sentences size
|
||||
size_t FindNextSentences(size_t numSentences);
|
||||
bool DataEnd(EndDataType endDataType);
|
||||
void SetSentenceEnd(int wrd, int pos, int actualMbSize);
|
||||
|
@ -345,7 +345,7 @@ public:
|
|||
|
||||
template <class ConfigRecordType>
|
||||
void LoadWordMapping(const ConfigRecordType& config);
|
||||
bool CanReadFor(wstring nodeName); /// return true if this reader can output for a node with name nodeName
|
||||
bool CanReadFor(wstring nodeName); // return true if this reader can output for a node with name nodeName
|
||||
|
||||
vector<size_t> ReturnToProcessId()
|
||||
{
|
||||
|
@ -365,12 +365,12 @@ public:
|
|||
/**
|
||||
for sequential reading data, useful for beam search decoding
|
||||
*/
|
||||
/// this is for frame-by-frame reading of data.
|
||||
/// data is first read into these matrices and then if needed is column-by-column retrieved
|
||||
// this is for frame-by-frame reading of data.
|
||||
// data is first read into these matrices and then if needed is column-by-column retrieved
|
||||
map<wstring, Matrix<ElemType>> mMatrices;
|
||||
bool GetFrame(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
||||
|
||||
/// create proposals
|
||||
// create proposals
|
||||
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
||||
|
||||
public:
|
||||
|
@ -443,7 +443,7 @@ public:
|
|||
int GetSentenceEndIdFromOutputLabel();
|
||||
bool DataEnd(EndDataType endDataType);
|
||||
|
||||
/// create proposals
|
||||
// create proposals
|
||||
void InitProposals(map<wstring, Matrix<ElemType>*>& pMat);
|
||||
bool GetProposalObs(std::map<std::wstring, Matrix<ElemType>*>& matrices, const size_t tidx, vector<size_t>& history);
|
||||
};
|
||||
|
|
|
@ -657,7 +657,7 @@ void UCIFastReader<ElemType>::StartDistributedMinibatchLoop(size_t mbSize, size_
|
|||
m_subsetNum = subsetNum;
|
||||
m_numSubsets = numSubsets;
|
||||
if (mOneLinePerFile)
|
||||
mbSize = mRequestedNumParallelSequences; /// each file has only one observation, therefore the number of data to read is the number of files
|
||||
mbSize = mRequestedNumParallelSequences; // each file has only one observation, therefore the number of data to read is the number of files
|
||||
|
||||
// if we aren't currently caching, see if we can use a cache
|
||||
if (!m_cachingReader && !m_cachingWriter)
|
||||
|
|
|
@ -2350,7 +2350,7 @@ bool SGD<ElemType>::GradientCheck(ComputationNetworkPtr net,
|
|||
|
||||
for (size_t itry = 0; itry < min((size_t) 50, node->Value().GetNumElements()); itry++)
|
||||
{
|
||||
/// no support to sparse matrix yet
|
||||
// no support to sparse matrix yet
|
||||
int irow = (int) fmod(rand(), node->Gradient().GetNumRows() - 1);
|
||||
int icol = (int) fmod(rand(), node->Gradient().GetNumCols() - 1);
|
||||
irow = max(0, irow);
|
||||
|
@ -2584,8 +2584,8 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
m_L2RegWeight = configSGD(L"L2RegWeight", 0.0);
|
||||
m_L1RegWeight = configSGD(L"L1RegWeight", 0.0);
|
||||
|
||||
/// for backward support. future setup should use gradUpdateType=AdaGrad, instead of
|
||||
/// useAdagrad=true
|
||||
// for backward support. future setup should use gradUpdateType=AdaGrad, instead of
|
||||
// useAdagrad=true
|
||||
bool useAdagrad = configSGD(L"useAdagrad", false);
|
||||
if (useAdagrad)
|
||||
{
|
||||
|
@ -2596,7 +2596,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
|
|||
m_adaptationRegType = ParseAdaptationRegType(configSGD(L"adaptationRegType", L"None"));
|
||||
m_adaptationRegWeight = configSGD(L"adaptationRegWeight", 0.0);
|
||||
|
||||
/// gradient check setup
|
||||
// gradient check setup
|
||||
m_doGradientCheck = configSGD(L"gradientcheck", false);
|
||||
m_gradientCheckSigDigit = configSGD(L"sigFigs", 6.0); // TODO: why is this a double?
|
||||
|
||||
|
|
|
@ -101,8 +101,8 @@ public:
|
|||
|
||||
totalEpochSamples += actualMBSize;
|
||||
|
||||
/// call DataEnd function in dataReader to do
|
||||
/// reader specific process if sentence ending is reached
|
||||
// call DataEnd function in dataReader to do
|
||||
// reader specific process if sentence ending is reached
|
||||
dataReader.DataEnd(endDataSentence);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,11 +26,11 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
|
|||
|
||||
assert(nStream == sentenceBegin.GetNumRows());
|
||||
|
||||
/// only set state to init state value for segmentation = 0, and -1
|
||||
/// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
||||
// only set state to init state value for segmentation = 0, and -1
|
||||
// e.g., -1 0 1 -> 0 0 1 -> 0 0 -1 -> 1 1 0
|
||||
|
||||
Matrix<ElemType> colPos(sentenceBegin.GetDeviceId());
|
||||
colPos.SetValue(sentenceBegin); /// -1 0 1
|
||||
colPos.SetValue(sentenceBegin); // -1 0 1
|
||||
colPos.InplaceTruncateBottom(1 << 0 /*(int)MinibatchPackingFlags::SequenceStart*/); // TODO: these flags no longer exist, this test probably no longer applies
|
||||
Matrix<ElemType>::Scale((ElemType) -1.0, colPos);
|
||||
colPos += 0; // (int)MinibatchPackingFlags::None; // TODO: these flags no longer exist, this test probably no longer applies
|
||||
|
@ -38,8 +38,8 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
|
|||
Matrix<ElemType> ones(sentenceBegin.GetDeviceId());
|
||||
ones.Resize(nStateRow, nStream);
|
||||
ones.SetValue((ElemType) 1);
|
||||
/// add default state value if it is for reset
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); /// += [0 initStateValue 0 ]
|
||||
// add default state value if it is for reset
|
||||
Matrix<ElemType>::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); // += [0 initStateValue 0 ]
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -107,7 +107,7 @@ void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool
|
|||
int d = iPastIndex;
|
||||
if (d < 0)
|
||||
d = (int) functionValues.Mod((float) iPastIndex, (float) pastActivity.GetNumCols());
|
||||
/// this can point to the past activity of the previous mninibatch
|
||||
// this can point to the past activity of the previous mninibatch
|
||||
|
||||
Matrix<ElemType> out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + indexInBatch, 1);
|
||||
Matrix<ElemType> inp((DEVICEID_TYPE) functionValues.GetDeviceId());
|
||||
|
|
Загрузка…
Ссылка в новой задаче