Add a node-specific function MaskToZeroWhenLabelAndFeatureMissing in ClassBasedcrossentropynode. No test cases yet given.

This commit is contained in:
kaisheny 2015-06-19 21:04:08 -07:00
Родитель 8003d24f2b
Коммит 5a1175de68
2 изменённых файлов: 72 добавлений и 22 удалений

Просмотреть файл

@ -261,8 +261,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/**
reset to error signals to 0 for any elements without labele
*/
void MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1)
bool MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1)
{
bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed
if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr && !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty())
{
size_t nT = matrixToBeMasked.GetNumCols();
@ -292,9 +294,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
colSeg.InplaceTruncateTop(SENTENCE_MIDDLE); // change to 0 1 1
colSeg.Reshape(1, nS);
matrixToBeMasked.ColumnSlice(utt_t, nS).RowElementMultiplyWith(colSeg);
processedExistsNoLabelorFeatureMissing = true;
}
}
}
return processedExistsNoLabelorFeatureMissing;
}
void SetLoopId(const int id)

Просмотреть файл

@ -1274,7 +1274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(),
Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords);
Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords, this);
m_needRecomputeGradientToSoftmaxInput = true;
}
@ -1286,7 +1286,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
const Matrix<ElemType>& inputs, const Matrix<ElemType>& input_weight, const Matrix<ElemType>& input_cls_log_post_prob,
Matrix<ElemType>& logSoftmax,
Matrix<ElemType>& softMax, Matrix<ElemType>& clsLogSoftmax, Matrix<ElemType>& clsSoftmax, size_t& totalWords)
Matrix<ElemType>& softMax,
Matrix<ElemType>& clsLogSoftmax, Matrix<ElemType>& clsSoftmax, size_t& totalWords, ClassBasedCrossEntropyWithSoftmaxNode* curNode)
{
totalWords = 0;
size_t nT = lbls.GetNumCols();
@ -1339,33 +1340,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/// W x_t
Matrix<ElemType> softMax_t = softMax.ColumnSlice(sz, nbr_wrd);
Matrix<ElemType> logSoftMax_t = logSoftmax.ColumnSlice(sz, nbr_wrd);
Matrix<ElemType> obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1
obs.Reshape(1, nRow); /// 1 x 200
logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148
if (curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t) == false)
{
Matrix<ElemType> obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1
obs.Reshape(1, nRow); /// 1 x 200
// log softmax(W x_t)
logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148
softMax_t.SetValue(logSoftMax_t);
// softmax(W x_t)
softMax_t.InplaceExp(); /// 1 x 148
logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148
/// add the word log posterior probability
if (y_t < lft_bnd)
LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. ");
// log softmax(W x_t)
logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148
softMax_t.SetValue(logSoftMax_t);
// softmax(W x_t)
softMax_t.InplaceExp(); /// 1 x 148
size_t idx_in_class = y_t - lft_bnd;
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);
/// add the word log posterior probability
if (y_t < lft_bnd)
LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. ");
size_t idx_in_class = y_t - lft_bnd;
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);
}
/// add the class log posterior probability
try{
Matrix<ElemType>::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0);
}
catch (...)
if (curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t) == false)
{
fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n");
throw;
try{
Matrix<ElemType>::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0);
}
catch (...)
{
fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n");
throw;
}
}
sz += nbr_wrd;
}
@ -1376,6 +1385,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}
/**
reset to error signals to 0 for any elements without labels
*/
bool MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t t)
{
bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed
if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr
&& !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty())
{
size_t nS = m_sentenceSeg->GetNumRows();
if (m_existsSentenceBeginOrNoLabels->GetNumRows() != 1)
{
LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_existsSentenceBeginOrNoLabels should be a one row matrix or a vector. ");
}
Matrix<ElemType> colSeg(m_sentenceSeg->GetDeviceId());
size_t j = t / nS;
size_t i = t % nS;
if (m_existsSentenceBeginOrNoLabels->ColumnSlice(j, 1).Get00Element() == EXISTS_SENTENCE_BEGIN_OR_NO_LABELS)
{
if ((*m_sentenceSeg)(j, i) == NO_LABELS)
{
matrixToBeMasked.ColumnSlice(t,1).SetValue(0);
processedExistsNoLabelorFeatureMissing = true;
}
}
}
return processedExistsNoLabelorFeatureMissing;
}
/**
Inputs: [0] label in dense matrix in [4 x T]
the first row is the word index, the second row is the class index, the third row is the first word index of the class