From 5a1175de6813f77e76c728fcc05da2340b132092 Mon Sep 17 00:00:00 2001 From: kaisheny Date: Fri, 19 Jun 2015 21:04:08 -0700 Subject: [PATCH] Add a node-specific function MaskToZeroWhenLabelAndFeatureMissing in ClassBasedcrossentropynode. No test cases yet given. --- MachineLearning/CNTK/ComputationNode.h | 8 +- MachineLearning/CNTK/TrainingCriterionNodes.h | 86 ++++++++++++++----- 2 files changed, 72 insertions(+), 22 deletions(-) diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index 38344b691..9b7462ab8 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -261,8 +261,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labele */ - void MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) + bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) { + bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed + if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr && !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty()) { size_t nT = matrixToBeMasked.GetNumCols(); @@ -292,9 +294,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { colSeg.InplaceTruncateTop(SENTENCE_MIDDLE); // change to 0 1 1 colSeg.Reshape(1, nS); matrixToBeMasked.ColumnSlice(utt_t, nS).RowElementMultiplyWith(colSeg); + + processedExistsNoLabelorFeatureMissing = true; } } } + + return processedExistsNoLabelorFeatureMissing; } void SetLoopId(const int id) diff --git a/MachineLearning/CNTK/TrainingCriterionNodes.h b/MachineLearning/CNTK/TrainingCriterionNodes.h index c3d6c592b..a6742a29d 100644 --- a/MachineLearning/CNTK/TrainingCriterionNodes.h +++ b/MachineLearning/CNTK/TrainingCriterionNodes.h @@ -1274,7 +1274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), - Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords); + Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords, this); m_needRecomputeGradientToSoftmaxInput = true; } @@ -1286,7 +1286,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { static void EvaluateThisNodeS(Matrix& functionValues, const Matrix& lbls, const Matrix& inputs, const Matrix& input_weight, const Matrix& input_cls_log_post_prob, Matrix& logSoftmax, - Matrix& softMax, Matrix& clsLogSoftmax, Matrix& clsSoftmax, size_t& totalWords) + Matrix& softMax, + Matrix& clsLogSoftmax, Matrix& clsSoftmax, size_t& totalWords, ClassBasedCrossEntropyWithSoftmaxNode* curNode) { totalWords = 0; size_t nT = lbls.GetNumCols(); @@ -1339,33 +1340,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// W x_t Matrix softMax_t = softMax.ColumnSlice(sz, nbr_wrd); Matrix logSoftMax_t = logSoftmax.ColumnSlice(sz, nbr_wrd); - Matrix obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1 - obs.Reshape(1, nRow); /// 1 x 200 - logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148 + if (curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t) == false) + { + Matrix obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1 + obs.Reshape(1, nRow); /// 1 x 200 - // log softmax(W x_t) - logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148 - softMax_t.SetValue(logSoftMax_t); - // softmax(W x_t) - softMax_t.InplaceExp(); /// 1 x 148 + logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148 - /// add the word log posterior probability - if (y_t < lft_bnd) - LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. "); + // log softmax(W x_t) + logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148 + softMax_t.SetValue(logSoftMax_t); + // softmax(W x_t) + softMax_t.InplaceExp(); /// 1 x 148 - size_t idx_in_class = y_t - lft_bnd; - Matrix::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0); + /// add the word log posterior probability + if (y_t < lft_bnd) + LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. "); + + size_t idx_in_class = y_t - lft_bnd; + Matrix::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0); + } /// add the class log posterior probability - try{ - Matrix::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0); - } - catch (...) + if (curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t) == false) { - fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n"); - throw; + try{ + Matrix::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0); + } + catch (...) + { + fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n"); + throw; + } } + sz += nbr_wrd; } @@ -1376,6 +1385,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif } + /** + reset to error signals to 0 for any elements without labels + */ + bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t t) + { + bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed + + if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr + && !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty()) + { + size_t nS = m_sentenceSeg->GetNumRows(); + + if (m_existsSentenceBeginOrNoLabels->GetNumRows() != 1) + { + LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_existsSentenceBeginOrNoLabels should be a one row matrix or a vector. "); + } + + Matrix colSeg(m_sentenceSeg->GetDeviceId()); + + size_t j = t / nS; + size_t i = t % nS; + if (m_existsSentenceBeginOrNoLabels->ColumnSlice(j, 1).Get00Element() == EXISTS_SENTENCE_BEGIN_OR_NO_LABELS) + { + if ((*m_sentenceSeg)(j, i) == NO_LABELS) + { + matrixToBeMasked.ColumnSlice(t,1).SetValue(0); + + processedExistsNoLabelorFeatureMissing = true; + } + } + } + + return processedExistsNoLabelorFeatureMissing; + } + /** Inputs: [0] label in dense matrix in [4 x T] the first row is the word index, the second row is the class index, the third row is the first word index of the class