Fix InvStdDev.
* Issue was that AssignSqrOfDifferenceOf(beta, input, mean, alpha) assigns mean value to the gaps in input. These values are then reduced within this function, leading to incorrect results. The fix is to execute assign and reduce separately, and mask gaps to zero again before reducing. * Update test baseline affected by this change (err is lowered by <1%).
This commit is contained in:
Родитель
0ffdcf7f1d
Коммит
a55e871ec8
|
@ -270,7 +270,8 @@ public:
|
|||
: Base(deviceId, name),
|
||||
m_mean(make_shared<Matrix<ElemType>>(deviceId)),
|
||||
m_var (make_shared<Matrix<ElemType>>(deviceId)),
|
||||
m_temp(make_shared<Matrix<ElemType>>(deviceId))
|
||||
m_temp(make_shared<Matrix<ElemType>>(deviceId)),
|
||||
m_inputTemp(make_shared<Matrix<ElemType>>(deviceId))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -311,6 +312,8 @@ public:
|
|||
|
||||
// set gaps to zero, since we are reducing in time
|
||||
InputRef(0).MaskMissingValueColumnsToZero(fr);
|
||||
m_inputTemp->Resize(InputRef(0).Value()); // same size of input
|
||||
m_inputTemp->SetValue(0);
|
||||
|
||||
size_t numNewSamples = InputRef(0).GetMBLayout()->GetActualNumSamples();
|
||||
size_t totalNumSamples = m_numSamples + numNewSamples;
|
||||
|
@ -320,10 +323,11 @@ public:
|
|||
ElemType beta = (ElemType)m_numSamples / totalNumSamples;
|
||||
|
||||
size_t rank = DetermineElementwiseTensorRank();
|
||||
auto input = InputRef(0).ValueTensorFor( rank, fr);
|
||||
auto mean = DataTensorFor(m_mean, rank, FrameRange());
|
||||
auto temp = DataTensorFor(m_temp, rank, FrameRange());
|
||||
auto var = DataTensorFor(m_var, rank, FrameRange());
|
||||
auto input = InputRef(0).ValueTensorFor( rank, fr);
|
||||
auto mean = DataTensorFor(m_mean, rank, FrameRange());
|
||||
auto temp = DataTensorFor(m_temp, rank, FrameRange());
|
||||
auto var = DataTensorFor(m_var, rank, FrameRange());
|
||||
auto inputTemp = TensorView<ElemType>(m_inputTemp, InputRef(0).GetTensorSliceFor(rank, fr));
|
||||
|
||||
// preserve the old mean value for the next step
|
||||
temp.AssignCopyOf(mean);
|
||||
|
@ -337,7 +341,9 @@ public:
|
|||
var.AddSqrOf(temp); // add the square
|
||||
|
||||
// var += (input - mean)^2
|
||||
var.DoSqrOfDifferenceOf(beta, input, mean, alpha); // this reduces as well
|
||||
inputTemp.AssignSqrOfDifferenceOf(input, mean, 1.0f); // this doesn't reduce yet.
|
||||
MaskMissingColumnsToZero(*m_inputTemp, InputRef(0).GetMBLayout(), fr); // set gaps to zero.
|
||||
var.DoCopyOf(beta, inputTemp, alpha); // now reduce
|
||||
|
||||
m_numSamples += InputRef(0).GetMBLayout()->GetActualNumSamples();
|
||||
}
|
||||
|
@ -358,6 +364,7 @@ private:
|
|||
shared_ptr<Matrix<ElemType>> m_mean;
|
||||
shared_ptr<Matrix<ElemType>> m_var;
|
||||
shared_ptr<Matrix<ElemType>> m_temp;
|
||||
shared_ptr<Matrix<ElemType>> m_inputTemp;
|
||||
};
|
||||
|
||||
template class InvStdDevNode<float>;
|
||||
|
|
|
@ -1521,8 +1521,8 @@ minibatchiterator: epoch 0: frames [0..2560] (first utterance at frame 0), data
|
|||
12/15/2016 08:46:23: Epoch[ 1 of 2]-Minibatch[ 1- 1, 0.78%]: ce = 4.88277172 * 886; err = 0.99548533 * 886; time = 4.1250s; samplesPerSecond = 214.8
|
||||
12/15/2016 08:46:24: Epoch[ 1 of 2]-Minibatch[ 2- 2, 1.56%]: ce = 4.06417226 * 226; err = 0.76106195 * 226; time = 1.2433s; samplesPerSecond = 181.8
|
||||
12/15/2016 08:46:27: Epoch[ 1 of 2]-Minibatch[ 3- 3, 2.34%]: ce = 3.97176231 * 526; err = 0.82889734 * 526; time = 2.4236s; samplesPerSecond = 217.0
|
||||
12/15/2016 08:46:33: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82720986 * 946; err = 0.91014799 * 946; time = 6.1612s; samplesPerSecond = 153.5
|
||||
12/15/2016 08:46:33: Finished Epoch[ 1 of 2]: [Training] ce = 4.60538939 * 2584; err = 0.90982972 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=13.9556s
|
||||
12/15/2016 08:46:33: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82879600 * 946; err = 0.90697674 * 946; time = 6.1612s; samplesPerSecond = 153.5
|
||||
12/15/2016 08:46:33: Finished Epoch[ 1 of 2]: [Training] ce = 4.60596459 * 2584; err = 0.90866873 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=13.9556s
|
||||
12/15/2016 08:46:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082658.690476\Speech\LSTM_FullUtterance@release_cpu/models/cntkSpeech.dnn.1'
|
||||
|
||||
12/15/2016 08:46:33: Starting Epoch 2: learning rate per sample = 0.025000 effective momentum = 0.899988 momentum as time constant = 189.8 samples
|
||||
|
|
|
@ -1521,8 +1521,8 @@ minibatchiterator: epoch 0: frames [0..2560] (first utterance at frame 0), data
|
|||
12/15/2016 08:47:25: Epoch[ 1 of 2]-Minibatch[ 1- 1, 0.78%]: ce = 4.88277172 * 886; err = 0.99548533 * 886; time = 3.6171s; samplesPerSecond = 245.0
|
||||
12/15/2016 08:47:26: Epoch[ 1 of 2]-Minibatch[ 2- 2, 1.56%]: ce = 4.06417226 * 226; err = 0.76106195 * 226; time = 1.2956s; samplesPerSecond = 174.4
|
||||
12/15/2016 08:47:29: Epoch[ 1 of 2]-Minibatch[ 3- 3, 2.34%]: ce = 3.97176323 * 526; err = 0.82889734 * 526; time = 2.4769s; samplesPerSecond = 212.4
|
||||
12/15/2016 08:47:34: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82721244 * 946; err = 0.91014799 * 946; time = 5.5012s; samplesPerSecond = 172.0
|
||||
12/15/2016 08:47:34: Finished Epoch[ 1 of 2]: [Training] ce = 4.60539052 * 2584; err = 0.90982972 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=12.8927s
|
||||
12/15/2016 08:47:34: Epoch[ 1 of 2]-Minibatch[ 4- 4, 3.13%]: ce = 4.82879652 * 946; err = 0.90697674 * 946; time = 5.5012s; samplesPerSecond = 172.0
|
||||
12/15/2016 08:47:34: Finished Epoch[ 1 of 2]: [Training] ce = 4.60596459 * 2584; err = 0.90866873 * 2584; totalSamplesSeen = 2584; learningRatePerSample = 0.025; epochTime=12.8927s
|
||||
12/15/2016 08:47:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082658.690476\Speech\LSTM_FullUtterance@release_gpu/models/cntkSpeech.dnn.1'
|
||||
|
||||
12/15/2016 08:47:35: Starting Epoch 2: learning rate per sample = 0.025000 effective momentum = 0.899988 momentum as time constant = 189.8 samples
|
||||
|
|
Загрузка…
Ссылка в новой задаче