* Issue was that AssignSqrOfDifferenceOf(beta, input, mean, alpha)
assigns mean value to the gaps in input. These values are then reduced
within this function, leading to incorrect results. The fix is to
execute assign and reduce separately, and mask gaps to zero again before reducing.
* Update test baseline affected by this change (err is lowered by <1%).
This commit is contained in:
Bowen Bao 2018-10-18 15:23:03 -07:00
Родитель 0ffdcf7f1d
Коммит a55e871ec8
3 изменённых файлов: 17 добавлений и 10 удалений

Просмотреть файл

@ -270,7 +270,8 @@ public:
: Base(deviceId, name),
m_mean(make_shared<Matrix<ElemType>>(deviceId)),
m_var (make_shared<Matrix<ElemType>>(deviceId)),
m_temp(make_shared<Matrix<ElemType>>(deviceId))
m_temp(make_shared<Matrix<ElemType>>(deviceId)),
m_inputTemp(make_shared<Matrix<ElemType>>(deviceId))
{
}
@ -311,6 +312,8 @@ public:
// set gaps to zero, since we are reducing in time
InputRef(0).MaskMissingValueColumnsToZero(fr);
m_inputTemp->Resize(InputRef(0).Value()); // same size of input
m_inputTemp->SetValue(0);
size_t numNewSamples = InputRef(0).GetMBLayout()->GetActualNumSamples();
size_t totalNumSamples = m_numSamples + numNewSamples;
@ -320,10 +323,11 @@ public:
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
size_t rank = DetermineElementwiseTensorRank();
auto input = InputRef(0).ValueTensorFor( rank, fr);
auto mean = DataTensorFor(m_mean, rank, FrameRange());
auto temp = DataTensorFor(m_temp, rank, FrameRange());
auto var = DataTensorFor(m_var, rank, FrameRange());
auto input = InputRef(0).ValueTensorFor( rank, fr);
auto mean = DataTensorFor(m_mean, rank, FrameRange());
auto temp = DataTensorFor(m_temp, rank, FrameRange());
auto var = DataTensorFor(m_var, rank, FrameRange());
auto inputTemp = TensorView<ElemType>(m_inputTemp, InputRef(0).GetTensorSliceFor(rank, fr));
// preserve the old mean value for the next step
temp.AssignCopyOf(mean);
@ -337,7 +341,9 @@ public:
var.AddSqrOf(temp); // add the square
// var += (input - mean)^2
var.DoSqrOfDifferenceOf(beta, input, mean, alpha); // this reduces as well
inputTemp.AssignSqrOfDifferenceOf(input, mean, 1.0f); // this doesn't reduce yet.
MaskMissingColumnsToZero(*m_inputTemp, InputRef(0).GetMBLayout(), fr); // set gaps to zero.
var.DoCopyOf(beta, inputTemp, alpha); // now reduce
m_numSamples += InputRef(0).GetMBLayout()->GetActualNumSamples();
}
@ -358,6 +364,7 @@ private:
shared_ptr<Matrix<ElemType>> m_mean;
shared_ptr<Matrix<ElemType>> m_var;
shared_ptr<Matrix<ElemType>> m_temp;
shared_ptr<Matrix<ElemType>> m_inputTemp;
};
template class InvStdDevNode<float>;

Просмотреть файл

@ -1521,8 +1521,8 @@ minibatchiterator: epoch 0: frames [0..2560] (first utterance at frame 0), data
12/15/2016 08:46:23: Epoch[ 1 of 2]-Minibatch[ 1- 1, 0.78%]: ce = 4.88277172 * 886; err = 0.99548533 * 886; time = 4.1250s; samplesPerSecond = 214.8
12/15/2016 08:46:24: Epoch[ 1 of 2]-Minibatch[ 2- 2, 1.56%]: ce = 4.06417226 * 226; err = 0.76106195 * 226; time = 1.2433s; samplesPerSecond = 181.8
12/15/2016 08:46:27: Epoch[ 1 of 2]-Minibatch[ 3- 3, 2.34%]: ce = 3.97176231 * 526; err = 0.82889734 * 526; time = 2.4236s; samplesPerSecond = 217.0
12/15/2016 08:46:33: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82720986 * 946; err = 0.91014799 * 946; time = 6.1612s; samplesPerSecond = 153.5
12/15/2016 08:46:33: Finished Epoch[ 1 of 2]: [Training] ce = 4.60538939 * 2584; err = 0.90982972 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=13.9556s
12/15/2016 08:46:33: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82879600 * 946; err = 0.90697674 * 946; time = 6.1612s; samplesPerSecond = 153.5
12/15/2016 08:46:33: Finished Epoch[ 1 of 2]: [Training] ce = 4.60596459 * 2584; err = 0.90866873 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=13.9556s
12/15/2016 08:46:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082658.690476\Speech\LSTM_FullUtterance@release_cpu/models/cntkSpeech.dnn.1'
12/15/2016 08:46:33: Starting Epoch 2: learning rate per sample = 0.025000 effective momentum = 0.899988 momentum as time constant = 189.8 samples

Просмотреть файл

@ -1521,8 +1521,8 @@ minibatchiterator: epoch 0: frames [0..2560] (first utterance at frame 0), data
12/15/2016 08:47:25: Epoch[ 1 of 2]-Minibatch[ 1- 1, 0.78%]: ce = 4.88277172 * 886; err = 0.99548533 * 886; time = 3.6171s; samplesPerSecond = 245.0
12/15/2016 08:47:26: Epoch[ 1 of 2]-Minibatch[ 2- 2, 1.56%]: ce = 4.06417226 * 226; err = 0.76106195 * 226; time = 1.2956s; samplesPerSecond = 174.4
12/15/2016 08:47:29: Epoch[ 1 of 2]-Minibatch[ 3- 3, 2.34%]: ce = 3.97176323 * 526; err = 0.82889734 * 526; time = 2.4769s; samplesPerSecond = 212.4
12/15/2016 08:47:34: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82721244 * 946; err = 0.91014799 * 946; time = 5.5012s; samplesPerSecond = 172.0
12/15/2016 08:47:34: Finished Epoch[ 1 of 2]: [Training] ce = 4.60539052 * 2584; err = 0.90982972 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=12.8927s
12/15/2016 08:47:34: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82879652 * 946; err = 0.90697674 * 946; time = 5.5012s; samplesPerSecond = 172.0
12/15/2016 08:47:34: Finished Epoch[ 1 of 2]: [Training] ce = 4.60596459 * 2584; err = 0.90866873 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=12.8927s
12/15/2016 08:47:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082658.690476\Speech\LSTM_FullUtterance@release_gpu/models/cntkSpeech.dnn.1'
12/15/2016 08:47:35: Starting Epoch 2: learning rate per sample = 0.025000 effective momentum = 0.899988 momentum as time constant = 189.8 samples