change SparseInputValue node and CPU Sparse matrix to make LM CPU training work.

This commit is contained in:
Dong Yu 2015-01-17 13:37:00 -08:00
Родитель 459ff2ded1
Коммит 43ea68b59e
5 изменённых файлов: 58 добавлений и 122 удалений

Просмотреть файл

@ -811,10 +811,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return newNode;
}
//sparse matrix size is optionally specified
ComputationNodePtr CreateSparseInputNode(const std::wstring inputName, const size_t rows, const size_t cols, const size_t size = 0)
ComputationNodePtr CreateSparseInputNode(const std::wstring inputName, const size_t rows, const size_t cols)
{
ComputationNodePtr newNode(new SparseInputValue<ElemType>(rows, cols, size, m_deviceId, inputName));
ComputationNodePtr newNode(new SparseInputValue<ElemType>(rows, cols, m_deviceId, inputName));
AddNodeToNet(newNode);
return newNode;
}

Просмотреть файл

@ -1150,112 +1150,38 @@ protected: \
template class InputValue<double>;
template<class ElemType>
class SparseInputValue : public ComputationNode<ElemType>
class SparseInputValue : public InputValue<ElemType>
{
UsingComputationNodeMembers;
public:
SparseInputValue (size_t rows, size_t cols, size_t size, const DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
SparseInputValue (size_t rows, size_t cols, const DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const std::wstring name = L"") : InputValue<ElemType>(rows, cols, deviceId, name)
{
if (rows * cols == 0)
throw std::logic_error("This InputValue dimension is 0.");
m_outputWidth = 1;
m_outputHeight = rows;
m_outputChannels = 1;
m_nodeName = (name == L""? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
m_functionValues.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
m_functionValues.Resize(rows, cols, size);
m_needGradient = false;
InitRecurrentNode();
ConvertToSparseMatrix();
}
SparseInputValue (size_t imageWidth, size_t imageHeight, size_t imageChannels, size_t numImages, const DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const std::wstring name = L"")
: ComputationNode<ElemType>(deviceId)
: InputValue<ElemType>(imageWidth, imageHeight, imageChannels, numImages, deviceId, name)
{
size_t rows = imageWidth * imageHeight * imageChannels;
size_t cols = numImages;
if (rows * cols == 0)
throw std::logic_error("This InputValue dimension is 0.");
m_outputWidth = imageWidth;
m_outputHeight = imageHeight;
m_outputChannels = imageChannels;
m_nodeName = (name == L""? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
m_functionValues.SwitchToMatrixType(MatrixType::SPARSE);
m_functionValues.Resize(rows, cols);
m_needGradient = false;
InitRecurrentNode();
}
SparseInputValue (File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode<ElemType>(deviceId)
{
m_nodeName = (name == L""? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
ConvertToSparseMatrix();
}
virtual void SaveToFile(File& fstream) const
SparseInputValue (File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, const std::wstring name = L"") : InputValue<ElemType>(fstream, modelVersion, deviceId, name)
{
ComputationNode<ElemType>::SaveToFile(fstream);
fstream << FunctionValues().GetNumRows() << FunctionValues().GetNumCols();
fstream << FunctionValues().GetAllocatedSize();
fstream << m_outputWidth << m_outputHeight << m_outputChannels;
ConvertToSparseMatrix();
}
virtual void LoadFromFile(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX)
{
ComputationNode<ElemType>::LoadFromFile(fstream, modelVersion, deviceId);
size_t rows, cols;
fstream >> rows >> cols;
if (rows * cols == 0)
throw std::logic_error("This InputValue dimension is 0.");
size_t size; //sparse matrix size
fstream >> size;
fstream >> m_outputWidth >> m_outputHeight >> m_outputChannels;
m_functionValues.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
m_functionValues.Resize(rows, cols, size);
m_needGradient = false;
InputValue<ElemType>::LoadFromFile(fstream, modelVersion, deviceId);
ConvertToSparseMatrix();
}
virtual const std::wstring OperationName() const {return TypeName();}
static const std::wstring TypeName() {return L"SparseInputValue";}
virtual void EvaluateThisNode() {}
virtual void EvaluateThisNode(const size_t /*timeIdxInSeq*/) {}
virtual void ComputeInputPartial(const size_t /*inputIndex*/) {}
virtual void ComputeInputPartial(const size_t /*inputIndex*/, const size_t /*timeIdxInSeq*/) {}
virtual void Validate()
{
PrintSelfBeforeValidation();
//CopyImageSizeFromInputs(); //not necessary since InputValue are leafs. put it here for consistent
}
virtual void DumpNodeInfo(const bool printValues, File& fstream) const
{
ComputationNode<ElemType>::DumpNodeInfo(printValues, fstream);
char str[4096];
sprintf(str, "[%lu,%lu]", FunctionValues().GetNumRows(), FunctionValues().GetNumCols());
fstream << string(str);
}
// copy constructor
SparseInputValue (const SparseInputValue <ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode<ElemType>(node->m_deviceId)
SparseInputValue (const SparseInputValue <ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags) : InputValue<ElemType>(node, newName, flags)
{
node->CopyTo(this, newName, flags);
}
virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
@ -1266,11 +1192,15 @@ protected: \
return node;
}
virtual TaskDescriptor<ElemType>* GetPTaskDescriptor(TaskType /*taskType*/, size_t inputIndex=0) const
private:
void ConvertToSparseMatrix()
{
inputIndex;
return nullptr;
size_t rows = m_functionValues.GetNumRows();
size_t cols = m_functionValues.GetNumCols();
m_functionValues.SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC);
m_functionValues.Resize(rows, cols); //SwitchToMatrixType does not reserve information right now.
}
};

Просмотреть файл

@ -28,8 +28,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input = nullptr, w = nullptr, b = nullptr, u = nullptr, delay = nullptr, output = nullptr, label = nullptr, prior = nullptr;
//TODO: to figure out sparse matrix size
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize, 0);
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)
@ -628,8 +628,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, gt=nullptr, ct=nullptr, ht=nullptr;
ComputationNodePtr delayXI = nullptr, delayXII = nullptr, delayXIII = nullptr, delayXIV = nullptr;
//TODO: to figure out sparse matrix size
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize, 0);
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)
@ -739,8 +738,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
w = m_net->CreateSparseLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers], 0);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
// b = m_net->CreateLearnableParameter(msra::strfun::wstrprintf (L"B%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], 1);
//TODO: to figure out sparse matrix size
label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize, 0);
label = m_net->CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize);
AddTrainAndEvalCriterionNodes(input, label, w);
output = m_net->Times(w, input);
@ -1102,7 +1100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr directWIO = nullptr, directInput=nullptr, directOutput=nullptr;
ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = {nullptr};
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize, m_layerSizes[0] * mbSize);
input = m_net->CreateSparseInputNode(L"features", m_layerSizes[0], mbSize);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)

Просмотреть файл

@ -101,14 +101,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_nz = 0;
m_matrixName = NULL;
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
//if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
m_colIdx = -1;
m_pArray = NULL;
m_unCompIndex = NULL;
m_compIndex = NULL;
}
else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
//else if (m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
m_blockSize = 0;
m_blockVal = NULL;
@ -247,8 +247,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t *unCompIndex = new size_t[numNZElemToReserve];
size_t *compIndex = new size_t[newCompIndexSize];
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
if (keepExistingValues && m_nz > 0)
{
assert(m_compIndexSize > 0 && m_nz < numNZElemToReserve);
memcpy(pArray, m_pArray, sizeof(ElemType)*m_nz);
memcpy(unCompIndex, m_unCompIndex, sizeof(size_t)*m_nz);
memcpy(compIndex, m_compIndex, sizeof(size_t)*m_compIndexSize);
@ -270,9 +274,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ElemType *blockVal = new ElemType[numNZElemToReserve];
size_t *blockIds = new size_t[newCompIndexSize];
if (keepExistingValues && (m_nz > numNZElemToReserve || m_compIndexSize > newCompIndexSize))
throw std::logic_error("Resize: To keep values m_nz should <= numNZElemToReserve and m_compIndexSize <= newCompIndexSize");
if (keepExistingValues && m_elemSizeAllocated > 0)
{
memcpy(blockVal, m_blockVal, sizeof(ElemType)*m_elemSizeAllocated);
assert(m_compIndexSize > 0 && m_elemSizeAllocated < numNZElemToReserve);
memcpy(blockVal, m_blockVal, sizeof(ElemType)*m_nz);
memcpy(blockIds, m_blockIds, sizeof(size_t)*m_compIndexSize);
}
@ -296,18 +304,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
m_nz = 0;
m_colIdx = -1;
m_compIndexSize = 0;
m_blockSize = 0;
}
//c = op(a) * op(this) or c += op(a) * op(this)
//c = alpha*op(lhs) * op(rhs) + beta*c
template<class ElemType>
void CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c)
{
if (lhs.IsEmpty() || rhs.IsEmpty())
throw std::logic_error("LeftMultiplyAndAdd: one of the input matrix is empty.");
throw std::logic_error("MultiplyAndWeightedAdd: one of the input matrix is empty.");
int m = transposeA? (int)lhs.GetNumCols(): (int)lhs.GetNumRows();
int k = transposeA? (int)lhs.GetNumRows(): (int)lhs.GetNumCols();
@ -318,7 +325,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
assert (k == l);
if (k != l)
{
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndAdd: The inner dimensions of a and b must match.");
throw std::invalid_argument("CPUSparseMatrix::MultiplyAndWeightedAdd: The inner dimensions of a and b must match.");
}
if (c.GetNumRows() != m || c.GetNumCols() != n)
@ -330,7 +337,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
memset(c.GetArray(), 0, sizeof(ElemType) * c.GetNumElements());
}
else
else if (beta != 1)
{
#pragma omp parallel for
foreach_coord(i,j,c)
@ -339,15 +346,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
if (rhs.GetFormat() != matrixFormatSparseCSC)
NOT_IMPLEMENTED;
if (!transposeA && !transposeB)
{
for(size_t j = 0; j < rhs.GetNumCols(); j++)
{
size_t start = rhs.m_compIndex[j];
size_t start = rhs.m_compIndex[j]; //ColLocation
size_t end = rhs.m_compIndex[j+1];
for(size_t p = start; p < end; p++)
{
size_t i = rhs.m_unCompIndex[p];
size_t i = rhs.m_unCompIndex[p]; //RowLocation
ElemType val = rhs.m_pArray[p];
for(size_t h = 0; h < lhs.GetNumRows(); h++)
@ -385,7 +395,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
//c = alpha * op(a) * op(this)
//c = alpha * op(lhs) * op(rhs)
template<class ElemType>
void CPUSparseMatrix<ElemType>::MultiplyAndAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, CPUSparseMatrix<ElemType>& c)
@ -414,10 +424,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (!transposeA && transposeB)
{
//allocate enough memory
if(c.m_elemSizeAllocated < lhs.GetNumElements())
{
c.Resize(c.GetNumRows(), c.GetNumCols(), lhs.GetNumElements());
}
c.SetFormat(matrixFormatSparseBlockCol);
c.Resize(c.GetNumRows(), c.GetNumCols(), lhs.GetNumElements());
map<size_t, size_t> w2Id;
for(size_t j = 0; j < rhs.GetNumCols(); j++)
@ -460,7 +468,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
throw std::logic_error("sparse matrix out of range.");
}
c.SetFormat(matrixFormatSparseBlockCol);
//c.SetFormat(matrixFormatSparseBlockCol);
}
else if (transposeA && !transposeB)
{
@ -552,8 +560,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//allocate enough memory
if(etp.m_elemSizeAllocated < etp.GetNumElements())
{
etp.Resize(etp.GetNumRows(), etp.GetNumCols(), etp.GetNumElements());
etp.Resize(etp.GetNumRows(), etp.GetNumCols(), etp.GetNumElements(), true, false);
}
etp.Reset();
entropyScore(0, 0) = 0;
for(size_t j = 0; j < label.GetNumCols(); j++)
{
@ -655,11 +665,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const CPUMatrix<ElemType>& /*idx2cls*/,
CPUSparseMatrix<ElemType>& grd)
{
grd.SetFormat(matrixFormatSparseBlockRow);
//allocate enough memory
if(grd.m_elemSizeAllocated < error.m_nz*input.GetNumRows())
{
grd.Resize(grd.GetNumRows(), grd.GetNumCols(), error.m_nz*input.GetNumRows());
}
grd.Resize(grd.GetNumRows(), grd.GetNumCols(), error.m_nz*input.GetNumRows(), true, false);
grd.Reset();
map<size_t, size_t> w2Id;
for(size_t j = 0; j < error.GetNumCols(); j++)
@ -701,7 +710,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
throw std::logic_error("sparse matrix out of range.");
}
grd.SetFormat(matrixFormatSparseBlockRow);
//grd.SetFormat(matrixFormatSparseBlockRow);
}
// normal update for smoothed gradients c and current gradients (this)

Просмотреть файл

@ -365,10 +365,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
stream << s << format;
stream<<us.m_numRows<<us.m_numCols;
ElemType *m_pArray = us.CopyToArray();
ElemType *pArray = us.CopyToArray();
for (size_t i=0;i<us.GetNumElements();++i)
stream<<m_pArray[i];
delete[] m_pArray;
stream<<pArray[i];
delete[] pArray;
stream.PutMarker(fileMarkerEndSection, std::wstring(L"EMAT"));
return stream;
}