Add a node-specific function MaskToZeroWhenLabelAndFeatureMissing in ClassBasedcrossentropynode. No test cases yet given.
This commit is contained in:
Родитель
8003d24f2b
Коммит
5a1175de68
|
@ -261,8 +261,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
/**
|
||||
reset to error signals to 0 for any elements without labele
|
||||
*/
|
||||
void MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1)
|
||||
bool MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1)
|
||||
{
|
||||
bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed
|
||||
|
||||
if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr && !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty())
|
||||
{
|
||||
size_t nT = matrixToBeMasked.GetNumCols();
|
||||
|
@ -292,9 +294,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
colSeg.InplaceTruncateTop(SENTENCE_MIDDLE); // change to 0 1 1
|
||||
colSeg.Reshape(1, nS);
|
||||
matrixToBeMasked.ColumnSlice(utt_t, nS).RowElementMultiplyWith(colSeg);
|
||||
|
||||
processedExistsNoLabelorFeatureMissing = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return processedExistsNoLabelorFeatureMissing;
|
||||
}
|
||||
|
||||
void SetLoopId(const int id)
|
||||
|
|
|
@ -1274,7 +1274,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(),
|
||||
Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords);
|
||||
Inputs(3)->FunctionValues(), m_logSoftmax, m_softMax, m_clsLogSoftmax, m_clsSoftmax, m_totalNbrWords, this);
|
||||
m_needRecomputeGradientToSoftmaxInput = true;
|
||||
}
|
||||
|
||||
|
@ -1286,7 +1286,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
static void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls,
|
||||
const Matrix<ElemType>& inputs, const Matrix<ElemType>& input_weight, const Matrix<ElemType>& input_cls_log_post_prob,
|
||||
Matrix<ElemType>& logSoftmax,
|
||||
Matrix<ElemType>& softMax, Matrix<ElemType>& clsLogSoftmax, Matrix<ElemType>& clsSoftmax, size_t& totalWords)
|
||||
Matrix<ElemType>& softMax,
|
||||
Matrix<ElemType>& clsLogSoftmax, Matrix<ElemType>& clsSoftmax, size_t& totalWords, ClassBasedCrossEntropyWithSoftmaxNode* curNode)
|
||||
{
|
||||
totalWords = 0;
|
||||
size_t nT = lbls.GetNumCols();
|
||||
|
@ -1339,33 +1340,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
/// W x_t
|
||||
Matrix<ElemType> softMax_t = softMax.ColumnSlice(sz, nbr_wrd);
|
||||
Matrix<ElemType> logSoftMax_t = logSoftmax.ColumnSlice(sz, nbr_wrd);
|
||||
Matrix<ElemType> obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1
|
||||
obs.Reshape(1, nRow); /// 1 x 200
|
||||
|
||||
logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148
|
||||
if (curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t) == false)
|
||||
{
|
||||
Matrix<ElemType> obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1
|
||||
obs.Reshape(1, nRow); /// 1 x 200
|
||||
|
||||
// log softmax(W x_t)
|
||||
logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148
|
||||
softMax_t.SetValue(logSoftMax_t);
|
||||
// softmax(W x_t)
|
||||
softMax_t.InplaceExp(); /// 1 x 148
|
||||
logSoftMax_t.AssignProductOf(obs, false, weightForClass, false); /// 1 x 148
|
||||
|
||||
/// add the word log posterior probability
|
||||
if (y_t < lft_bnd)
|
||||
LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. ");
|
||||
// log softmax(W x_t)
|
||||
logSoftMax_t.InplaceLogSoftmax(false); /// 1 x 148
|
||||
softMax_t.SetValue(logSoftMax_t);
|
||||
// softmax(W x_t)
|
||||
softMax_t.InplaceExp(); /// 1 x 148
|
||||
|
||||
size_t idx_in_class = y_t - lft_bnd;
|
||||
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);
|
||||
/// add the word log posterior probability
|
||||
if (y_t < lft_bnd)
|
||||
LogicError("ClassBasedCrossEntropyWithSoftmax::EvaluateThisNodeS : the word index is smaller than its left bound of its class. This could happen because of reader issues. ");
|
||||
|
||||
size_t idx_in_class = y_t - lft_bnd;
|
||||
Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);
|
||||
}
|
||||
|
||||
/// add the class log posterior probability
|
||||
try{
|
||||
Matrix<ElemType>::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0);
|
||||
}
|
||||
catch (...)
|
||||
if (curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t) == false)
|
||||
{
|
||||
fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n");
|
||||
throw;
|
||||
try{
|
||||
Matrix<ElemType>::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
fprintf(stderr, "EvaluateThisNodeS for ClassBasedCrossEntropyWithSoftmaxNode : number of classes is smaller than the dimension to read. Check network builder such as nbrClass and vocabulary file with class index to see if the number of classes and the maximum class index match. The right number should be number of classes == maximum class index number + 1\n");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
sz += nbr_wrd;
|
||||
}
|
||||
|
||||
|
@ -1376,6 +1385,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
reset to error signals to 0 for any elements without labels
|
||||
*/
|
||||
bool MaskToZeroWhenLabelAndFeatureMissing(Matrix<ElemType>& matrixToBeMasked, const size_t t)
|
||||
{
|
||||
bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed
|
||||
|
||||
if (m_sentenceSeg != nullptr && m_existsSentenceBeginOrNoLabels != nullptr
|
||||
&& !m_sentenceSeg->IsEmpty() && !m_existsSentenceBeginOrNoLabels->IsEmpty())
|
||||
{
|
||||
size_t nS = m_sentenceSeg->GetNumRows();
|
||||
|
||||
if (m_existsSentenceBeginOrNoLabels->GetNumRows() != 1)
|
||||
{
|
||||
LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_existsSentenceBeginOrNoLabels should be a one row matrix or a vector. ");
|
||||
}
|
||||
|
||||
Matrix<ElemType> colSeg(m_sentenceSeg->GetDeviceId());
|
||||
|
||||
size_t j = t / nS;
|
||||
size_t i = t % nS;
|
||||
if (m_existsSentenceBeginOrNoLabels->ColumnSlice(j, 1).Get00Element() == EXISTS_SENTENCE_BEGIN_OR_NO_LABELS)
|
||||
{
|
||||
if ((*m_sentenceSeg)(j, i) == NO_LABELS)
|
||||
{
|
||||
matrixToBeMasked.ColumnSlice(t,1).SetValue(0);
|
||||
|
||||
processedExistsNoLabelorFeatureMissing = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return processedExistsNoLabelorFeatureMissing;
|
||||
}
|
||||
|
||||
/**
|
||||
Inputs: [0] label in dense matrix in [4 x T]
|
||||
the first row is the word index, the second row is the class index, the third row is the first word index of the class
|
||||
|
|
Загрузка…
Ссылка в новой задаче