Merge branch 'master' of https://git01.codeplex.com/cntk into amitaga/mergeHTKMLFReaders
This commit is contained in:
Коммит
8e85f07de3
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
2
Makefile
2
Makefile
|
@ -169,6 +169,7 @@ MATH_SRC =\
|
|||
Math/Math/MatrixQuantizerCPU.cpp \
|
||||
Math/Math/QuantizedMatrix.cpp \
|
||||
Math/Math/Matrix.cpp \
|
||||
Math/Math/CUDAPageLockedMemAllocator.cpp \
|
||||
|
||||
ifdef CUDA_PATH
|
||||
MATH_SRC +=\
|
||||
|
@ -176,7 +177,6 @@ MATH_SRC +=\
|
|||
Math/Math/GPUMatrixCUDAKernels.cu \
|
||||
Math/Math/GPUSparseMatrix.cu \
|
||||
Math/Math/GPUWatcher.cu \
|
||||
Math/Math/CUDAPageLockedMemAllocator.cpp \
|
||||
Math/Math/MatrixQuantizerGPU.cu \
|
||||
|
||||
else
|
||||
|
|
|
@ -152,10 +152,6 @@
|
|||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
<FileType>CppCode</FileType>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="ValueQuantizer.cu">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
<FileType>CppCode</FileType>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="GPUMatrix.cu">
|
||||
|
|
|
@ -16,9 +16,6 @@
|
|||
<CudaCompile Include="MatrixQuantizerGPU.cu">
|
||||
<Filter>GPU\1bitSGD</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="ValueQuantizer.cu">
|
||||
<Filter>GPU\1bitSGD</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="MatrixQuantizer_kernel.cu">
|
||||
<Filter>GPU\1bitSGD</Filter>
|
||||
</CudaCompile>
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
#include "stdafx.h"
|
||||
#include "CUDAPageLockedMemAllocator.h"
|
||||
#ifndef CPUONLY
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif // !CPUONLY
|
||||
#include "BestGpu.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
#ifndef CPUONLY
|
||||
CUDAPageLockedMemAllocator::CUDAPageLockedMemAllocator(int deviceID)
|
||||
: m_deviceID(deviceID)
|
||||
{
|
||||
|
@ -25,4 +29,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
cudaSetDevice(m_deviceID);
|
||||
cudaFreeHost(p) || "Free in CUDAPageLockedMemAllocator failed";
|
||||
}
|
||||
|
||||
int CUDAPageLockedMemAllocator::GetDeviceID() const
|
||||
{
|
||||
return m_deviceID;
|
||||
}
|
||||
#else
|
||||
// Dummy definitions when compiling for CPUONLY
|
||||
CUDAPageLockedMemAllocator::CUDAPageLockedMemAllocator(int)
|
||||
{
|
||||
}
|
||||
|
||||
int CUDAPageLockedMemAllocator::GetDeviceID() const
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
char* CUDAPageLockedMemAllocator::Malloc(size_t)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void CUDAPageLockedMemAllocator::Free(char*)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
}}}
|
||||
|
|
|
@ -19,11 +19,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public:
|
||||
CUDAPageLockedMemAllocator(int deviceID);
|
||||
|
||||
int GetDeviceID() const
|
||||
{
|
||||
return m_deviceID;
|
||||
}
|
||||
|
||||
int GetDeviceID() const;
|
||||
char* Malloc(size_t size) override;
|
||||
void Free(char* p) override;
|
||||
|
||||
|
|
9504
Math/Math/Matrix.cpp
9504
Math/Math/Matrix.cpp
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -4,8 +4,10 @@
|
|||
#include "ColumnQuantizer.h"
|
||||
#include "QuantizedMatrix.h"
|
||||
#include "GPUMatrix.h"
|
||||
#ifndef CPUONLY
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda.h>
|
||||
#endif // !CPUONLY
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
|
@ -34,6 +36,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Helper function to get a temporary intermediate matrix on the GPU to store quantization results
|
||||
QuantizedMatrix<ElemType>& GetTempGPUQuantizedMatrix(size_t nBits, bool& newlyAllocated);
|
||||
|
||||
#ifndef CPUONLY
|
||||
// Record a event to flag the completion of quantization/unquantization kernel on the compute stream
|
||||
void RecordQuantizeCompleteEvent(cudaStream_t computestream) const;
|
||||
|
||||
|
@ -68,7 +71,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
mutable cudaEvent_t m_quantizeCompleteEvent;
|
||||
mutable cudaEvent_t m_fetchCompleteEvent;
|
||||
mutable cudaEvent_t m_assignCompleteEvent;
|
||||
#endif // !CPUONLY
|
||||
|
||||
private:
|
||||
bool m_forceSync;
|
||||
bool m_quantizeOpIncludedFetch;
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <device_launch_parameters.h>
|
||||
|
||||
#include "ValueQuantizer.h"
|
||||
#include "ValueQuantizer.cu"
|
||||
#include "ColumnQuantizer.h"
|
||||
#include "QuantizedMatrix.h"
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "GPUMatrix.h"
|
||||
#include "GPUSparseMatrix.h"
|
||||
#include "MatrixQuantizerGPU.h"
|
||||
|
||||
#pragma warning (disable: 4100) // unreferenced formal parameter, which is OK since all functions in here are dummies; disabling this allows to copy-paste prototypes here when we add new functions
|
||||
#pragma warning (disable: 4702) // unreachable code, which we get from the NOT_IMPLEMENTED macro which is OK
|
||||
|
@ -355,6 +356,7 @@ namespace Microsoft {
|
|||
|
||||
#pragma endregion Helper Functions
|
||||
|
||||
template class GPUSparseMatrix<char>;
|
||||
template class GPUSparseMatrix<float>;
|
||||
template class GPUSparseMatrix<double>;
|
||||
|
||||
|
@ -477,6 +479,7 @@ namespace Microsoft {
|
|||
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols) { return *this; }
|
||||
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::SetColumnSlice(const GPUMatrix<ElemType>& fromMatrix, size_t startColumn, size_t numCols) { return *this; }
|
||||
|
||||
//for each column of a, we assign numRows starting from startIndex to this
|
||||
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
|
||||
|
@ -1082,10 +1085,50 @@ namespace Microsoft {
|
|||
}
|
||||
#pragma endregion Static BLAS Functions
|
||||
|
||||
#pragma region MatrixQuantizerGPU functions
|
||||
template<class ElemType>
|
||||
MatrixQuantizerGPU<ElemType>::MatrixQuantizerGPU(const Matrix<ElemType>& inMatrix, bool forceSync)
|
||||
: MatrixQuantizer<ElemType>(inMatrix)
|
||||
{
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
MatrixQuantizerGPU<ElemType>::~MatrixQuantizerGPU()
|
||||
{
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void MatrixQuantizerGPU<ElemType>::QuantizeAsync(QuantizedMatrix<ElemType>& outQMatrix, bool zeroThresholdFor1Bit)
|
||||
{
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void MatrixQuantizerGPU<ElemType>::WaitQuantizeAsyncDone()
|
||||
{
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void MatrixQuantizerGPU<ElemType>::UnquantizeAsync(QuantizedMatrix<ElemType>& inQMatrix, Matrix<ElemType>& outMatrix, bool add /*= false*/)
|
||||
{
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void MatrixQuantizerGPU<ElemType>::WaitUnquantizeAsyncDone()
|
||||
{
|
||||
}
|
||||
#pragma endregion MatrixQuantizerGPU functions
|
||||
|
||||
template class GPUMatrix<char>;
|
||||
template class GPUMatrix<float>;
|
||||
template class GPUMatrix<double>;
|
||||
template class DeviceBoundNumber<float>;
|
||||
template class DeviceBoundNumber<double>;
|
||||
template MatrixQuantizerGPU<float>::MatrixQuantizerGPU(const Matrix<float>&, bool forceSync);
|
||||
template MatrixQuantizerGPU<double>::MatrixQuantizerGPU(const Matrix<double>&, bool forceSync);
|
||||
template MatrixQuantizerGPU<float>::~MatrixQuantizerGPU();
|
||||
template MatrixQuantizerGPU<double>::~MatrixQuantizerGPU();
|
||||
template void MatrixQuantizerGPU<float>::QuantizeAsync(QuantizedMatrix<float>&, bool);
|
||||
template void MatrixQuantizerGPU<double>::QuantizeAsync(QuantizedMatrix<double>&, bool);
|
||||
|
||||
template<class ElemType> cublasHandle_t GPUMatrix<ElemType>::s_cuHandle[GPUMatrix<ElemType>::MaxGpus] = { 0 };
|
||||
|
||||
|
|
|
@ -1,153 +0,0 @@
|
|||
#ifndef __VALLUE_QUANTIZER_CUH__
|
||||
#define __VALLUE_QUANTIZER_CUH__
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "ValueQuantizer.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template<class ElemType>
|
||||
cudasharedcode
|
||||
ValueQuantizer<ElemType>::ValueQuantizer(size_t ldNbits, ElemType lower, ElemType upper)
|
||||
: ldNbits(ldNbits), Nbits(1 << ldNbits), quantimin(lower), quantimax(upper)
|
||||
{
|
||||
rangeend = ((QWordVal)1) << Nbits;
|
||||
|
||||
// post-fix for incorrect shift for no-quant hack (Nbits=32): << arg is taken mod 32!
|
||||
// in this case, it's only used as (rangeend-1) which is now correct (before it was 0!)
|
||||
if (Nbits >= (8 * sizeof(rangeend)))
|
||||
{
|
||||
rangeend = 0;
|
||||
}
|
||||
|
||||
// must protect against NaN: interval is 0 -> quantization is futile, just emit 0
|
||||
if (((quantimax - quantimin) < 1e-36f) || (rangeend == 0))
|
||||
{
|
||||
qfactor = ufactor = (ElemType)0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// precompute this for quantize() (see comment there)
|
||||
qfactor = rangeend / (quantimax - quantimin);
|
||||
// and for unquantize()
|
||||
ufactor = (quantimax - quantimin) / rangeend;
|
||||
}
|
||||
|
||||
// set the quantization threshold for the special case of 1-bit
|
||||
quantimid = 0.5f * (quantimax + quantimin);
|
||||
}
|
||||
|
||||
// quantize for full ElemType size bits case (special case that allows to bypass quantization, for testing/debugging purposes)
|
||||
template<class ElemType>
|
||||
cudasharedcode ValueQuantizer<ElemType>::QWordVal
|
||||
ValueQuantizer<ElemType>::QuantizeToFullQWord(ElemType u) const
|
||||
{
|
||||
assert(Nbits == QWordNumBits);
|
||||
|
||||
// we return the bit pattern that encodes the float value
|
||||
return *(QWordVal*)&u;
|
||||
}
|
||||
|
||||
// quantize one value --special version for 1 bit
|
||||
template<class ElemType>
|
||||
template<bool ZeroThresholdFor1Bit>
|
||||
cudasharedcode bool
|
||||
ValueQuantizer<ElemType>::Quantize1(ElemType u) const
|
||||
{
|
||||
assert (Nbits == 1);
|
||||
if (!ZeroThresholdFor1Bit)
|
||||
{
|
||||
return u >= quantimid;
|
||||
}
|
||||
else
|
||||
{
|
||||
return u >= (ElemType)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// quantize one value
|
||||
// TODO: we can optimize for 1 bit here - very simply use a template arg 'isonebit'
|
||||
template<class ElemType>
|
||||
template<bool ZeroThresholdFor1Bit>
|
||||
cudasharedcode ValueQuantizer<ElemType>::QWordVal
|
||||
ValueQuantizer<ElemType>::Quantize(ElemType u) const
|
||||
{
|
||||
if (Nbits == QWordNumBits)
|
||||
{
|
||||
return QuantizeToFullQWord(u);
|
||||
}
|
||||
// TODO: we may need to optimize this by a template arg
|
||||
else if (ldNbits == 0)
|
||||
{
|
||||
return Quantize1<ZeroThresholdFor1Bit>(u) ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (u <= quantimin)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (u >= quantimax)
|
||||
{
|
||||
return (rangeend - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (QWordVal)((QWordValSigned)((u - quantimin) * qfactor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unquantize one value
|
||||
template<class ElemType>
|
||||
cudasharedcode
|
||||
ElemType ValueQuantizer<ElemType>::Unquantize(QWordVal u) const
|
||||
{
|
||||
if (Nbits == QWordNumBits)
|
||||
{
|
||||
return *(ElemType*)&u;
|
||||
}
|
||||
|
||||
// Note: in 1-bit case, we want 0.5 -> mean0, 1.5 -> mean1
|
||||
return ((u + (ElemType)0.5) * ufactor) + quantimin;
|
||||
}
|
||||
|
||||
// unquantize one value --special case for 1 bit
|
||||
template<class ElemType>
|
||||
cudasharedcode
|
||||
ElemType ValueQuantizer<ElemType>::Unquantize1(bool u, ElemType val0, ElemType val1)
|
||||
{
|
||||
return u ? val1 : val0;
|
||||
}
|
||||
|
||||
// helper: compute the binary log of a power of two (utility function to convert 'Nbits' into 'ldNbits'
|
||||
template<class ElemType>
|
||||
size_t ValueQuantizer<ElemType>::ld(size_t v)
|
||||
{
|
||||
if (v == 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (v & 1) // not a power of two
|
||||
{
|
||||
throw std::runtime_error("ld: 'bits' must be a power of two");
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1 + ld (v >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template class ValueQuantizer<float>;
|
||||
template class ValueQuantizer<double>;
|
||||
template ValueQuantizer<float>::QWordVal ValueQuantizer<float>::Quantize<true>(float u) const;
|
||||
template ValueQuantizer<float>::QWordVal ValueQuantizer<float>::Quantize<false>(float u) const;
|
||||
template ValueQuantizer<double>::QWordVal ValueQuantizer<double>::Quantize<true>(double u) const;
|
||||
template ValueQuantizer<double>::QWordVal ValueQuantizer<double>::Quantize<false>(double u) const;
|
||||
template bool ValueQuantizer<float>::Quantize1<true>(float u) const;
|
||||
template bool ValueQuantizer<float>::Quantize1<false>(float u) const;
|
||||
template bool ValueQuantizer<double>::Quantize1<true>(double u) const;
|
||||
template bool ValueQuantizer<double>::Quantize1<false>(double u) const;
|
||||
}}}
|
||||
#endif
|
|
@ -2,10 +2,17 @@
|
|||
#ifndef __VALLUE_QUANTIZER_H__
|
||||
#define __VALLUE_QUANTIZER_H__
|
||||
|
||||
#ifndef CPUONLY
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <device_launch_parameters.h>
|
||||
#endif // !CPUONLY
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
|
||||
#pragma warning (disable: 4127) // conditional expression is constant
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -55,17 +62,98 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
static const size_t QWordNumBits = 8 * sizeof(QWord);
|
||||
|
||||
public:
|
||||
cudasharedcode ValueQuantizer(size_t ldNbits, ElemType lower, ElemType upper);
|
||||
cudasharedcode ValueQuantizer(size_t ldNbits, ElemType lower, ElemType upper)
|
||||
: ldNbits(ldNbits), Nbits(1 << ldNbits), quantimin(lower), quantimax(upper)
|
||||
{
|
||||
rangeend = ((QWordVal)1) << Nbits;
|
||||
|
||||
// post-fix for incorrect shift for no-quant hack (Nbits=32): << arg is taken mod 32!
|
||||
// in this case, it's only used as (rangeend-1) which is now correct (before it was 0!)
|
||||
if (Nbits >= (8 * sizeof(rangeend)))
|
||||
{
|
||||
rangeend = 0;
|
||||
}
|
||||
|
||||
// must protect against NaN: interval is 0 -> quantization is futile, just emit 0
|
||||
if (((quantimax - quantimin) < 1e-36f) || (rangeend == 0))
|
||||
{
|
||||
qfactor = ufactor = (ElemType)0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// precompute this for quantize() (see comment there)
|
||||
qfactor = rangeend / (quantimax - quantimin);
|
||||
// and for unquantize()
|
||||
ufactor = (quantimax - quantimin) / rangeend;
|
||||
}
|
||||
|
||||
// set the quantization threshold for the special case of 1-bit
|
||||
quantimid = 0.5f * (quantimax + quantimin);
|
||||
}
|
||||
|
||||
// quantize one value
|
||||
// TODO: we can optimize for 1 bit here - very simply use a template arg 'isonebit'
|
||||
template<bool ZeroThresholdFor1Bit>
|
||||
cudasharedcode QWordVal Quantize(ElemType u) const;
|
||||
cudasharedcode QWordVal Quantize(ElemType u) const
|
||||
{
|
||||
if (Nbits == QWordNumBits)
|
||||
{
|
||||
return QuantizeToFullQWord(u);
|
||||
}
|
||||
// TODO: we may need to optimize this by a template arg
|
||||
else if (ldNbits == 0)
|
||||
{
|
||||
return Quantize1<ZeroThresholdFor1Bit>(u) ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (u <= quantimin)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (u >= quantimax)
|
||||
{
|
||||
return (rangeend - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (QWordVal)((QWordValSigned)((u - quantimin) * qfactor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudasharedcode ElemType Unquantize(QWordVal u) const;
|
||||
// unquantize one value
|
||||
cudasharedcode ElemType Unquantize(QWordVal u) const
|
||||
{
|
||||
if (Nbits == QWordNumBits)
|
||||
{
|
||||
return *(ElemType*)&u;
|
||||
}
|
||||
|
||||
// Note: in 1-bit case, we want 0.5 -> mean0, 1.5 -> mean1
|
||||
return ((u + (ElemType)0.5) * ufactor) + quantimin;
|
||||
}
|
||||
|
||||
// quantize one value --special version for 1 bit
|
||||
template<bool ZeroThresholdFor1Bit>
|
||||
cudasharedcode bool Quantize1(ElemType u) const;
|
||||
cudasharedcode bool Quantize1(ElemType u) const
|
||||
{
|
||||
assert(Nbits == 1);
|
||||
if (!ZeroThresholdFor1Bit)
|
||||
{
|
||||
return u >= quantimid;
|
||||
}
|
||||
else
|
||||
{
|
||||
return u >= (ElemType)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
static cudasharedcode ElemType Unquantize1(bool u, ElemType val0, ElemType val1);
|
||||
// unquantize one value --special case for 1 bit
|
||||
static cudasharedcode ElemType Unquantize1(bool u, ElemType val0, ElemType val1)
|
||||
{
|
||||
return u ? val1 : val0;
|
||||
}
|
||||
|
||||
//how many bits we are quanatizing to
|
||||
cudasharedcode size_t NBits() const
|
||||
|
@ -79,10 +167,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return rangeend;
|
||||
}
|
||||
|
||||
static size_t ld(size_t v);
|
||||
|
||||
protected:
|
||||
cudasharedcode QWordVal QuantizeToFullQWord(ElemType u) const;
|
||||
// helper: compute the binary log of a power of two (utility function to convert 'Nbits' into 'ldNbits'
|
||||
static size_t ld(size_t v)
|
||||
{
|
||||
if (v == 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (v & 1) // not a power of two
|
||||
{
|
||||
throw std::runtime_error("ld: 'bits' must be a power of two");
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1 + ld(v >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// quantize for full ElemType size bits case (special case that allows to bypass quantization, for testing/debugging purposes)
|
||||
cudasharedcode QWordVal QuantizeToFullQWord(ElemType u) const
|
||||
{
|
||||
assert(Nbits == QWordNumBits);
|
||||
|
||||
// we return the bit pattern that encodes the float value
|
||||
return *(QWordVal*)&u;
|
||||
}
|
||||
|
||||
protected:
|
||||
// NBits must be power of two
|
||||
|
@ -105,4 +215,4 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ElemType ufactor;
|
||||
};
|
||||
}}}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
dataDir: ../Data
|
||||
|
||||
testCases:
|
||||
CNTK Run must be completed:
|
||||
patterns:
|
||||
- ^COMPLETED
|
||||
|
||||
Must train epochs in exactly same order and parameters:
|
||||
patterns:
|
||||
- ^Starting Epoch {{integer}}
|
||||
- learning rate per sample = {{float}}
|
||||
- momentum = {{float}}
|
||||
|
||||
Epochs must be finished with expected results:
|
||||
patterns:
|
||||
- ^Finished Epoch[{{integer}}]
|
||||
- TrainLossPerSample = {{float,tolerance=1%}}
|
||||
- EvalErrPerSample = {{float,tolerance=1%}}
|
||||
- Ave LearnRatePerSample = {{float,tolerance=1%}}
|
||||
|
||||
Per-minibatch training results must match:
|
||||
patterns:
|
||||
- ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} of {{integer}}]
|
||||
- SamplesSeen = {{integer}}
|
||||
- TrainLossPerSample = {{float,tolerance=1%}}
|
||||
- EvalErr[0]PerSample = {{float,tolerance=1%}}
|
||||
|
Загрузка…
Ссылка в новой задаче