Merge branch 'master' of https://git.codeplex.com/cntk into fseide/reshaping
This commit is contained in:
Коммит
5f7e6cfbea
|
@ -22,6 +22,7 @@ printValues=true
|
|||
devtest=[action=devtest]
|
||||
|
||||
train=[
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
action=train
|
||||
minibatchSize=10
|
||||
traceLevel=1
|
||||
|
@ -67,7 +68,7 @@ numMBsToShowResult=2000
|
|||
# gradUpdateType=AdaGrad
|
||||
gradUpdateType=None
|
||||
|
||||
modelPath=$ExpFolder$\modelRnnCNTK
|
||||
|
||||
loadBestModel=true
|
||||
|
||||
# settings for Auto Adjust Learning Rate
|
||||
|
|
|
@ -1102,7 +1102,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
pLeft->FunctionValues() = redU;
|
||||
pRight->FunctionValues() = redVT;
|
||||
|
||||
shared_ptr<ComputationNode<ElemType>> pTimes = AddNodeToNetAndAttachInputs(New<TimesNode<ElemType>>(m_deviceId, name + L"-SVD", true /*createOutputMatrix*/), pLeft, pRight);
|
||||
shared_ptr<ComputationNode<ElemType>> pTimes = AddNodeToNetAndAttachInputs(New<TimesNode<ElemType>>(m_deviceId, name + L"-SVD"), pLeft, pRight);
|
||||
|
||||
//========================================
|
||||
// Step 3. remove old node
|
||||
|
|
|
@ -899,7 +899,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_gradientValues != nullptr && m_gradientValues->GetMatrixType() != SPARSE) //since we don't have a sparse pool yet
|
||||
ReleaseMatrixToPool(m_gradientValues, matrixPool);
|
||||
|
||||
ReleaseMatrixToPool(m_functionValues, matrixPool);
|
||||
if (m_functionValues->GetMatrixType() != SPARSE)
|
||||
ReleaseMatrixToPool(m_functionValues, matrixPool);
|
||||
}
|
||||
}
|
||||
virtual void DumpNodeInfo(const bool /*printValues*/, File& fstream) const;
|
||||
|
@ -950,6 +951,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
auto f_numCols = m_functionValues->GetNumCols();
|
||||
if (f_numRows != m_numRows || f_numCols != m_numCols)
|
||||
LogicError("UpdateFunctionMBSize: m_functionValues out of sync with m_numRows/m_numCols");
|
||||
|
||||
#ifdef SHOW_MATRIX_TYPE
|
||||
fprintf(stderr, "MatrixType %ls: %ls(%ls %ls)\n",
|
||||
NodeName().c_str(),
|
||||
OperationName().c_str(),
|
||||
FunctionValues().GetMatrixType() == MatrixType::DENSE ? L"Dense" : L"Sparse",
|
||||
FunctionValues().GetCurrentMatrixLocation() == GPU ? L"GPU" :
|
||||
FunctionValues().GetCurrentMatrixLocation() == CPU ? L"CPU" : L"BOTH");
|
||||
#endif
|
||||
}
|
||||
|
||||
void ValidateInferChildDims(size_t i, size_t rows, size_t cols) override final;
|
||||
|
@ -1047,9 +1057,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
const Matrix<ElemType>& FunctionValues() const { return *m_functionValues; }
|
||||
Matrix<ElemType>& FunctionValues() { return *m_functionValues; }
|
||||
shared_ptr<Matrix<ElemType>>& FunctionValuesPtr() { return m_functionValues; }
|
||||
|
||||
const Matrix<ElemType>& GradientValues() const { return *m_gradientValues; }
|
||||
Matrix<ElemType>& GradientValues() { return *m_gradientValues; }
|
||||
shared_ptr<Matrix<ElemType>>& GradientValuesPtr() { return m_gradientValues; }
|
||||
|
||||
// function to access any input and output, value and gradient, whole batch or single frame
|
||||
// Note: This returns a reference into 'data' in the form of a column slice, i.e. a small matrix object that just points into 'data'.
|
||||
|
@ -1291,6 +1303,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
matrixPtr = make_shared<Matrix<ElemType>>(m_deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
//to be called by derived classed if that class needs to print node values
|
||||
void PrintNodeValuesToFile(const bool printValues, File& fstream) const
|
||||
{
|
||||
|
@ -1479,7 +1492,7 @@ protected: \
|
|||
using Base::SetDims; /*using Base::NotifyFunctionValuesMBSizeModified;*/ using Base::GetNumRows; using Base::GetNumCols; using Base::UpdateFunctionValuesSize; using Base::LoadFunctionValues; \
|
||||
using Base::m_pMBLayout; using Base::GetNumTimeSteps; using Base::GetNumParallelSequences; \
|
||||
using Base::MaskMissingColumnsToZero; using Base::MaskMissingValuesColumnsToZero; using Base::MaskMissingGradientColumnsToZero; using Base::InvalidateMissingValuesColumns; using Base::InvalidateMissingGradientColumns; \
|
||||
using Base::DataSlice; using Base::ValueSlice; using Base::GradientValues; using Base::GradientSlice; using Base::MaskedValueSlice; using Base::MaskedGradientSlice; \
|
||||
using Base::DataSlice; using Base::ValueSlice; using Base::GradientValues; using Base::GradientValuesPtr; using Base::GradientSlice; using Base::MaskedValueSlice; using Base::MaskedGradientSlice; \
|
||||
using Base::EvaluateThisNode; using Base::ComputeInputPartial; \
|
||||
using Base::m_children; using Base::m_deviceId; using Base::m_functionValues; using Base::m_gradientValues; \
|
||||
using Base::m_inputImageLayout; using Base::m_imageLayout; \
|
||||
|
@ -1503,7 +1516,7 @@ protected: \
|
|||
public: \
|
||||
using Base::RequiresPreCompute; \
|
||||
using Base::AttachInputs; using Base::NodeName; \
|
||||
using Base::FunctionValues
|
||||
using Base::FunctionValues; using Base::FunctionValuesPtr
|
||||
|
||||
#define ComputationNodeBoilerplate \
|
||||
protected: /* some boilerplate goes here */ \
|
||||
|
|
|
@ -392,18 +392,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
static const std::wstring TypeName() { return L"Times"; }
|
||||
public:
|
||||
|
||||
// TODO: The createOutputMatrix parameter here is temporarily added to allow creating the function values
|
||||
// matrix for the times node added during SVD decomposition. Since ValidateSubNetwork is called after addition
|
||||
// of the times node, the validation crashes if the function values matrix has not yet been allocated
|
||||
// This can be removed after the Validation has been fixed to not access the function values matrix at all
|
||||
DeclareConstructorFromConfigWithNumInputs(TimesNode);
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring & name, bool createOutputMatrix = false) :
|
||||
TimesNode(DEVICEID_TYPE deviceId, const wstring & name) :
|
||||
Base(deviceId, name)
|
||||
{
|
||||
if (createOutputMatrix)
|
||||
{
|
||||
CreateMatrixIfNull(m_functionValues);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
|
||||
|
@ -488,6 +480,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//after multiplication the structure is lost
|
||||
m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
|
||||
}
|
||||
|
||||
virtual void AllocateGradientMatricesForChildren(MatrixPool& matrixPool) override
|
||||
{
|
||||
//this is a special handling case. We need to allocate sparse matrix directly instead of from pool.
|
||||
if (m_children[0]->NeedGradient() && Inputs(1)->FunctionValues().GetMatrixType() == SPARSE)
|
||||
{
|
||||
CreateMatrixIfNull(Inputs(0)->GradientValuesPtr());
|
||||
Inputs(0)->GradientValues().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
|
||||
}
|
||||
|
||||
//we need to call base allocation at end since we will need to allocate special ones first
|
||||
//so that the default allocator will not allocate it again.
|
||||
Base::AllocateGradientMatricesForChildren(matrixPool);
|
||||
}
|
||||
};
|
||||
|
||||
template class TimesNode<float>;
|
||||
|
|
|
@ -30,8 +30,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void Release(shared_ptr<Matrix<ElemType>> freeMatrix)
|
||||
{
|
||||
vector<shared_ptr<Matrix<ElemType>>>& releasedMatrices = GetReleasedMatrices<ElemType>();
|
||||
if (freeMatrix == nullptr)
|
||||
RuntimeError("MatrixPool::Release: freeMatrix should not be null.");
|
||||
if (freeMatrix == nullptr || freeMatrix->GetMatrixType() == SPARSE)
|
||||
RuntimeError("MatrixPool::Release: freeMatrix should not be null or sparse.");
|
||||
#ifdef _DEBUG
|
||||
for (int i = 0; i < releasedMatrices.size(); i++)
|
||||
{
|
||||
|
|
|
@ -113,9 +113,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
m_blockSize = 0;
|
||||
m_blockIdShift = 0;
|
||||
m_pArray = NULL;
|
||||
m_blockIds = NULL;
|
||||
}
|
||||
m_nzValues = NULL;
|
||||
}
|
||||
|
||||
//should only be used by constructors.
|
||||
|
@ -166,34 +168,109 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return *this;
|
||||
}
|
||||
|
||||
//move constructor, shallow copy
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::CPUSparseMatrix(CPUSparseMatrix<ElemType>&& moveFrom)
|
||||
{
|
||||
m_format = moveFrom.m_format;
|
||||
m_numRows = moveFrom.m_numRows;
|
||||
m_numCols = moveFrom.m_numCols;
|
||||
m_elemSizeAllocated = moveFrom.m_elemSizeAllocated;
|
||||
m_compIndexSize = moveFrom.m_compIndexSize;
|
||||
m_externalBuffer = moveFrom.m_externalBuffer;
|
||||
m_computeDevice = moveFrom.m_computeDevice;
|
||||
m_nz = moveFrom.m_nz;
|
||||
m_matrixName = moveFrom.m_matrixName;
|
||||
|
||||
m_colIdx = moveFrom.m_colIdx;
|
||||
m_pArray = moveFrom.m_pArray;
|
||||
m_nzValues = moveFrom.m_nzValues;
|
||||
m_unCompIndex = moveFrom.m_unCompIndex;
|
||||
m_compIndex = moveFrom.m_compIndex;
|
||||
|
||||
m_blockSize = moveFrom.m_blockSize;
|
||||
m_blockIdShift = moveFrom.m_blockIdShift;
|
||||
m_blockIds = moveFrom.m_blockIds;
|
||||
|
||||
//release the pointer from the source object so that the destructor won't release it twice
|
||||
moveFrom.ZeroInit();
|
||||
}
|
||||
|
||||
//move assignment operator, shallow copy
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::operator=(CPUSparseMatrix<ElemType>&& moveFrom)
|
||||
{
|
||||
if (this != &moveFrom)
|
||||
{
|
||||
if (OwnBuffer())
|
||||
ReleaseMemory(); //always delete the data pointer since we will use the pointer from moveFrom
|
||||
|
||||
m_format = moveFrom.m_format;
|
||||
m_numRows = moveFrom.m_numRows;
|
||||
m_numCols = moveFrom.m_numCols;
|
||||
m_elemSizeAllocated = moveFrom.m_elemSizeAllocated;
|
||||
m_compIndexSize = moveFrom.m_compIndexSize;
|
||||
m_externalBuffer = moveFrom.m_externalBuffer;
|
||||
m_computeDevice = moveFrom.m_computeDevice;
|
||||
m_nz = moveFrom.m_nz;
|
||||
m_matrixName = moveFrom.m_matrixName;
|
||||
|
||||
m_colIdx = moveFrom.m_colIdx;
|
||||
m_pArray = moveFrom.m_pArray;
|
||||
m_nzValues = moveFrom.m_nzValues;
|
||||
m_unCompIndex = moveFrom.m_unCompIndex;
|
||||
m_compIndex = moveFrom.m_compIndex;
|
||||
|
||||
m_blockSize = moveFrom.m_blockSize;
|
||||
m_blockIdShift = moveFrom.m_blockIdShift;
|
||||
m_blockIds = moveFrom.m_blockIds;
|
||||
|
||||
//release the pointer from the source object so that the destructor won't release it twice
|
||||
moveFrom.ZeroInit();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
CPUSparseMatrix<ElemType>::~CPUSparseMatrix()
|
||||
{
|
||||
ReleaseMemory();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::ReleaseMemory()
|
||||
{
|
||||
// If m_externalBuffer is true then this matrix
|
||||
// is simply a view over another matrix. In that
|
||||
// case we shouldn't free anything.
|
||||
if (!m_externalBuffer)
|
||||
{
|
||||
if (m_matrixName!=NULL)
|
||||
delete[] m_matrixName;
|
||||
|
||||
if (m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
{
|
||||
delete[] m_matrixName;
|
||||
delete[] m_pArray;
|
||||
m_pArray = nullptr;
|
||||
m_nzValues = nullptr;
|
||||
|
||||
delete[] m_unCompIndex;
|
||||
m_unCompIndex = nullptr;
|
||||
|
||||
delete[] m_compIndex;
|
||||
m_compIndex = nullptr;
|
||||
}
|
||||
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
delete[] m_pArray;
|
||||
delete[] m_unCompIndex;
|
||||
delete[] m_compIndex;
|
||||
}
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
|
||||
{
|
||||
delete[] m_pArray;
|
||||
delete[] m_blockIds;
|
||||
delete[] m_pArray;
|
||||
m_pArray = nullptr;
|
||||
m_nzValues = nullptr;
|
||||
|
||||
delete[] m_blockIds;
|
||||
m_blockIds = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#pragma endregion Constructors and Destructor
|
||||
|
||||
#pragma region Basic Operators
|
||||
|
@ -307,15 +384,64 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (startColumn + numCols > m_numCols)
|
||||
InvalidArgument("The slice (%d+%d) is out of range of the source matrix (%d).", (int)startColumn, (int)numCols, (int)m_numCols);
|
||||
|
||||
if (m_format != MatrixFormat::matrixFormatSparseCSC)
|
||||
if (m_format != MatrixFormat::matrixFormatSparseCSC && m_format != MatrixFormat::matrixFormatSparseBlockCol)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
CPUSparseMatrix<ElemType> slice(m_format, m_numRows, numCols, m_elemSizeAllocated);
|
||||
slice.m_pArray = m_pArray;
|
||||
slice.m_unCompIndex = m_unCompIndex;
|
||||
slice.m_compIndex = m_compIndex + startColumn; // Just shift the compressed index location to the new startColumn - that's it!
|
||||
slice.m_externalBuffer = true;
|
||||
slice.m_nz = m_nz;
|
||||
CPUSparseMatrix<ElemType> slice(m_format);
|
||||
slice.m_numRows = m_numRows;
|
||||
slice.m_numCols = numCols;
|
||||
|
||||
if (m_format == MatrixFormat::matrixFormatSparseCSC)
|
||||
{
|
||||
slice.m_pArray = m_pArray;
|
||||
slice.m_nzValues = m_pArray + m_compIndex[startColumn]; //note: m_compIndex is always against m_pArray
|
||||
slice.m_unCompIndex = m_unCompIndex;
|
||||
slice.m_compIndex = m_compIndex + startColumn; // Just shift the compressed index location to the new startColumn - that's it!
|
||||
slice.m_externalBuffer = true;
|
||||
slice.m_nz = m_compIndex[startColumn + numCols] - m_compIndex[startColumn];
|
||||
slice.m_elemSizeAllocated = slice.m_nz;
|
||||
slice.m_compIndexSize = numCols + 1;
|
||||
}
|
||||
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol)
|
||||
{
|
||||
long long startColBlock = 0, endColBlock = 0;
|
||||
bool foundStart = false, foundEnd = false;
|
||||
for (size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
if (j > 0)
|
||||
{
|
||||
assert(m_blockIds[j] > m_blockIds[j - 1]); //assume ids are increasing.Is this valid?
|
||||
}
|
||||
|
||||
if (!foundStart && (long long) m_blockIds[j] - (long long)m_blockIdShift >= (long long)startColumn) // start column with values
|
||||
{
|
||||
startColBlock = j;
|
||||
foundStart = true;
|
||||
}
|
||||
else if ((long long)m_blockIds[j] - (long long)m_blockIdShift >= (long long)(startColumn + numCols)) //end column with values
|
||||
{
|
||||
endColBlock = j;
|
||||
foundEnd = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundStart)
|
||||
{
|
||||
startColBlock = (long long)m_blockSize;
|
||||
}
|
||||
if (!foundEnd)
|
||||
{
|
||||
endColBlock = (long long)m_blockSize;
|
||||
}
|
||||
|
||||
slice.m_pArray = m_pArray + startColBlock * m_numRows;
|
||||
slice.m_nzValues = slice.m_pArray;
|
||||
slice.m_blockIds = m_blockIds + startColBlock; //the value stored in the block id is based on the original column numbers
|
||||
slice.m_blockSize = (size_t)max((long long)0, endColBlock - startColBlock);
|
||||
slice.m_blockIdShift = m_blockIdShift + startColumn;
|
||||
slice.m_externalBuffer = true;
|
||||
slice.m_nz = slice.m_blockSize * m_numRows;
|
||||
}
|
||||
|
||||
return slice;
|
||||
}
|
||||
|
@ -439,22 +565,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (keepExistingValues && m_nz > 0)
|
||||
{
|
||||
assert(m_compIndexSize > 0 && m_nz < numNZElemToReserve);
|
||||
memcpy(pArray, m_pArray, NzSize());
|
||||
memcpy(pArray, m_nzValues, NzSize());
|
||||
memcpy(unCompIndex, m_unCompIndex, MajorIndexSize());
|
||||
memcpy(compIndex, m_compIndex, SecondaryIndexSize());
|
||||
}
|
||||
|
||||
if (m_pArray != NULL)
|
||||
delete [] m_pArray;
|
||||
if (m_unCompIndex != NULL)
|
||||
delete [] m_unCompIndex;
|
||||
if (m_compIndex != NULL)
|
||||
delete [] m_compIndex;
|
||||
m_pArray = NULL;
|
||||
m_unCompIndex = NULL;
|
||||
m_compIndex = NULL;
|
||||
delete [] m_pArray;
|
||||
delete [] m_unCompIndex;
|
||||
delete [] m_compIndex;
|
||||
|
||||
m_pArray = pArray;
|
||||
m_nzValues = m_pArray;
|
||||
m_unCompIndex = unCompIndex;
|
||||
m_compIndex = compIndex;
|
||||
}
|
||||
|
@ -469,18 +590,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (keepExistingValues && m_elemSizeAllocated > 0)
|
||||
{
|
||||
assert(m_compIndexSize > 0 && m_elemSizeAllocated < numNZElemToReserve);
|
||||
memcpy(blockVal, m_pArray, NzSize());
|
||||
memcpy(blockVal, m_nzValues, NzSize());
|
||||
memcpy(blockIds, m_blockIds, sizeof(size_t)*m_compIndexSize);
|
||||
}
|
||||
|
||||
if (m_pArray != NULL)
|
||||
delete[] m_pArray;
|
||||
if(m_blockIds != NULL)
|
||||
delete[] m_blockIds;
|
||||
m_pArray = NULL;
|
||||
m_blockIds = NULL;
|
||||
delete[] m_pArray;
|
||||
delete[] m_blockIds;
|
||||
|
||||
m_pArray = blockVal;
|
||||
m_nzValues = m_pArray;
|
||||
m_blockIds = blockIds;
|
||||
}
|
||||
|
||||
|
@ -496,6 +614,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_nz = 0;
|
||||
m_colIdx = -1;
|
||||
m_blockSize = 0;
|
||||
m_blockIdShift = 0;
|
||||
}
|
||||
|
||||
//c = alpha*op(lhs) * op(rhs) + beta*c
|
||||
|
@ -712,7 +831,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
for(size_t j = 0; j < lhs.m_blockSize; j++)
|
||||
{
|
||||
size_t i = lhs.m_blockIds[j];
|
||||
size_t i = lhs.m_blockIds[j] - lhs.m_blockIdShift;
|
||||
size_t len = (lhs.m_format == MatrixFormat::matrixFormatSparseBlockCol) ? lhs.GetNumRows() : lhs.GetNumCols();
|
||||
size_t start = j * len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
|
@ -771,7 +890,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
for(size_t j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
size_t i = m_blockIds[j];
|
||||
size_t i = m_blockIds[j] - m_blockIdShift;
|
||||
size_t len = (m_format == MatrixFormat::matrixFormatSparseBlockCol) ? GetNumRows() : GetNumCols();
|
||||
size_t start = j* len;
|
||||
for(size_t p = start; p < start+len; p++)
|
||||
|
@ -834,7 +953,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t p = 0;
|
||||
for (long j = 0; j < m_blockSize; j++)
|
||||
{
|
||||
size_t colOrRow = m_blockIds[j];
|
||||
size_t colOrRow = m_blockIds[j] - m_blockIdShift;
|
||||
for (long i = 0; i < len; i++, p++)
|
||||
{
|
||||
ElemType val = m_pArray[p];
|
||||
|
@ -1063,18 +1182,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifndef USE_MKL
|
||||
return (ElemType)dasum((int)this->NzCount(), reinterpret_cast <double*>(m_pArray), 1);
|
||||
return (ElemType)dasum((int)this->NzCount(), reinterpret_cast <double*>(m_nzValues), 1);
|
||||
#else
|
||||
return (ElemType)cblas_dasum((int)this->NzCount(), reinterpret_cast <double*>(m_pArray), 1);
|
||||
return (ElemType)cblas_dasum((int)this->NzCount(), reinterpret_cast <double*>(m_nzValues), 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning (suppress: 4244)
|
||||
#ifndef USE_MKL
|
||||
return sasum((int)this->NzCount(), reinterpret_cast <float*>(m_pArray), 1);
|
||||
return sasum((int)this->NzCount(), reinterpret_cast <float*>(m_nzValues), 1);
|
||||
#else
|
||||
return cblas_sasum((int)this->NzCount(), reinterpret_cast <float*>(m_pArray), 1);
|
||||
return cblas_sasum((int)this->NzCount(), reinterpret_cast <float*>(m_nzValues), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -1217,6 +1336,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template CPUSparseMatrix<char>::CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size);
|
||||
template CPUSparseMatrix<char>::CPUSparseMatrix(MatrixFormat);
|
||||
template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char> const &);
|
||||
template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char> &&);
|
||||
template CPUSparseMatrix<char>& CPUSparseMatrix<char>::operator=(CPUSparseMatrix<char>&& moveFrom);
|
||||
template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
|
||||
template char* CPUSparseMatrix<char>::BufferPointer() const;
|
||||
template void CPUSparseMatrix<char>::Reset(void);
|
||||
|
|
|
@ -33,14 +33,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
private:
|
||||
void ZeroInit();
|
||||
void CheckInit(const MatrixFormat format);
|
||||
void ReleaseMemory();
|
||||
|
||||
public:
|
||||
CPUSparseMatrix(const MatrixFormat format);
|
||||
CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size);
|
||||
CPUSparseMatrix(const CPUSparseMatrix<ElemType>& deepCopyFrom); //copy constructor, deep copy
|
||||
CPUSparseMatrix<ElemType>& operator=(const CPUSparseMatrix<ElemType>& deepCopyFrom); //assignment operator, deep copy
|
||||
|
||||
|
||||
CPUSparseMatrix(CPUSparseMatrix<ElemType>&& moveFrom); //move constructor, shallow copy
|
||||
CPUSparseMatrix<ElemType>& operator=(CPUSparseMatrix<ElemType>&& moveFrom); //move assignment operator, shallow copy
|
||||
~CPUSparseMatrix();
|
||||
|
||||
public:
|
||||
|
@ -137,8 +138,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
|
||||
public:
|
||||
const ElemType* NzValues() const { return m_pArray; }
|
||||
inline ElemType* NzValues() { return m_pArray; }
|
||||
const ElemType* NzValues() const { return m_nzValues; }
|
||||
inline ElemType* NzValues() { return m_nzValues; }
|
||||
size_t NzSize() const { return sizeof(ElemType)*m_nz; } // actual number of element bytes in use
|
||||
|
||||
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const { return m_unCompIndex; } //this is the major index, row/col ids in CSC/CSR format
|
||||
|
@ -169,6 +170,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
private:
|
||||
int m_colIdx; //used to SetValue()
|
||||
size_t m_compIndexSize;
|
||||
ElemType* m_nzValues;
|
||||
|
||||
//non-zero values are stored in m_pArray
|
||||
CPUSPARSE_INDEX_TYPE *m_unCompIndex; //row/col ids in CSC/CSR format
|
||||
|
@ -176,6 +178,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
size_t m_blockSize; //block size
|
||||
size_t *m_blockIds; //block ids
|
||||
size_t m_blockIdShift; //used to get efficient slice, actual col = blockIds[j] - m_blockIdShift
|
||||
};
|
||||
|
||||
typedef CPUSparseMatrix<float> CPUSingleSparseMatrix;
|
||||
|
|
|
@ -893,8 +893,8 @@ already there from last epoch
|
|||
Starting minibatch loop.
|
||||
randomordering: 21 retries for 100 elements (21.0%) to ensure window condition
|
||||
randomordering: recached sequence for seed 11: 6, 31, ...
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37213734; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.65604s; TotalTimePerSample = 6.56038ms; SamplesPerSecond = 152
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37213734; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.656382
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37077690; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.65604s; TotalTimePerSample = 6.56038ms; SamplesPerSecond = 152
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37077689; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.656382
|
||||
CNTKCommandTrainEnd: Train
|
||||
|
||||
|
||||
|
@ -2269,8 +2269,8 @@ reading from record 0 to 100 to be positioned properly for epoch
|
|||
Starting minibatch loop.
|
||||
randomordering: 21 retries for 100 elements (21.0%) to ensure window condition
|
||||
randomordering: recached sequence for seed 11: 6, 31, ...
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37792297; EvalErr[0]PerSample = 0.00000000; TotalTime = 1.34518s; TotalTimePerSample = 13.45185ms; SamplesPerSecond = 74
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37792295; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=1.371377
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37650299; EvalErr[0]PerSample = 0.00000000; TotalTime = 1.34518s; TotalTimePerSample = 13.45185ms; SamplesPerSecond = 74
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37650299; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=1.371377
|
||||
CNTKCommandTrainEnd: Train
|
||||
|
||||
|
||||
|
|
|
@ -864,8 +864,8 @@ already there from last epoch
|
|||
Starting minibatch loop.
|
||||
randomordering: 21 retries for 100 elements (21.0%) to ensure window condition
|
||||
randomordering: recached sequence for seed 11: 6, 31, ...
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37213734; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.08724s; TotalTimePerSample = 0.87241ms; SamplesPerSecond = 1146
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37213734; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.087336
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37077690; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.08724s; TotalTimePerSample = 0.87241ms; SamplesPerSecond = 1146
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37077689; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.087336
|
||||
CNTKCommandTrainEnd: Train
|
||||
|
||||
|
||||
|
@ -2182,8 +2182,8 @@ reading from record 0 to 100 to be positioned properly for epoch
|
|||
Starting minibatch loop.
|
||||
randomordering: 21 retries for 100 elements (21.0%) to ensure window condition
|
||||
randomordering: recached sequence for seed 11: 6, 31, ...
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37792297; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.89367s; TotalTimePerSample = 8.93670ms; SamplesPerSecond = 111
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37792295; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.908817
|
||||
Epoch[12 of 12]-Minibatch[ 1- 10 of 10]: SamplesSeen = 100; TrainLossPerSample = 0.37650299; EvalErr[0]PerSample = 0.00000000; TotalTime = 0.89367s; TotalTimePerSample = 8.93670ms; SamplesPerSecond = 111
|
||||
Finished Epoch[12 of 12]: [Training Set] TrainLossPerSample = 0.37650299; EvalErrPerSample = 0; Ave LearnRatePerSample = 0.004999999888; EpochTime=0.908817
|
||||
CNTKCommandTrainEnd: Train
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ deviceId=$DeviceId$
|
|||
ndlMacros=$ConfigDir$/Macros.ndl
|
||||
|
||||
parallelTrain=false
|
||||
NumCPUThreads=8
|
||||
|
||||
Train=[
|
||||
action=train
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
dataDir: ../Data
|
||||
tags:
|
||||
# running on every BVT job in 'I' (Image) leg:
|
||||
- bvt-i os=='windows' or device=='gpu'
|
||||
- bvt-i device=='gpu'
|
||||
# running every Nightly job in 'I' leg
|
||||
- nightly-i os=='windows' or device=='gpu'
|
||||
- nightly-i device=='gpu'
|
||||
|
||||
testCases:
|
||||
CNTK Run must be completed:
|
||||
|
|
Загрузка…
Ссылка в новой задаче