First implementation of new sparse*dense for CPU

This commit is contained in:
Thilo Will 2016-08-25 15:56:13 +02:00
Родитель f0aa69d365
Коммит 1810e7cafe
1 изменённых файлов: 150 добавлений и 0 удалений

Просмотреть файл

@ -792,6 +792,156 @@ void CPUSparseMatrix<ElemType>::Reset()
SetBlockSize(0);
SetBlockIdShift(0);
}
template <class ElemType, bool DenseTimesSparse /* false means SparseTimesDense */, bool transposeA, bool transposeB>
void Multiply(ElemType alpha, const CPUSparseMatrix<ElemType>& sparse, const CPUMatrix<ElemType>& dense, ElemType beta, CPUMatrix<ElemType>& c)
{
if (sparse.IsEmpty())
LogicError("MultiplyAndWeightedAdd: sparse input matrix is empty.");
if (dense.IsEmpty())
LogicError("MultiplyAndWeightedAdd: dense input matrix is empty.");
Matrix<ElemType>& lhs = DenseTimesSparse ? dense : sparse;
Matrix<ElemType>& rhs = DenseTimesSparse ? sparse : dense;
// C(m:n) is the product of matrices X * Y where we have the shapes X(m:k) and Y(l:n)
int m = transposeA ? (int)lhs.GetNumCols() : (int)lhs.GetNumRows();
int k = transposeA ? (int)lhs.GetNumRows() : (int)lhs.GetNumCols();
int l = transposeB ? (int)rhs.GetNumCols() : (int)rhs.GetNumRows();
int n = transposeB ? (int)rhs.GetNumRows() : (int)rhs.GetNumCols();
assert(m > 0 && k > 0 && l > 0 && n > 0); // converting from size_t to int may cause overflow
if (k != l)
{
InvalidArgument("CPUSparseMatrix::MultiplyAndWeightedAdd: The inner dimensions of a and b must match.");
}
// Determine the dimension of the outer index of the dense matrix.
int outerDimensionDense;
if ( DenseTimesSparse && !transposeA)
outerDimensionDense = dense.GetNumRows();
else if ( DenseTimesSparse && transposeA)
outerDimensionDense = dense.GetNumCols();
else if (!DenseTimesSparse && !transposeB)
outerDimensionDense = dense.GetNumCols();
else if (!DenseTimesSparse && transposeB)
outerDimensionDense = dense.GetNumRows();
if (beta == 0)
c.RequireSize(m, n);
else
c.VerifySize(m, n); // Can't resize if beta != 0
if (beta == 0)
{
memset(c.Data(), 0, sizeof(ElemType)* c.GetNumElements());
}
else if (beta != 1)
{
#pragma omp parallel for
foreach_coord(i, j, c)
{
c(i, j) = beta * c(i, j);
}
}
// TODO: Implement CSR as a transposition of b, like we do for GPU.
if (rhs.GetFormat() != matrixFormatSparseCSC)
NOT_IMPLEMENTED;
// Do the actual multiplication.
ElemType* valueBuffer = sparse.Buffer() + *sparse.SecondaryIndexLocation(); // Points to the value buffer of the current view (i.e. buffer containing indices of non-zero elements)
int* rowIndexBuffer = sparse.MajorIndexLocation(); // Points to the index buffer of the current view. (i.e. buffer containing indices of non-zero elements)
int iNonzero = 0; // Number of nonzero elements handled so far for curent slice view.
int numPreviosNonzero = sparse.SecondaryIndexLocation()[0]; // Total number of nonzero values handled in previous slices.
// Loop over columns of the sparse matrix
for (size_t colSparse = 0; colSparse < sparse.GetNumCols(); colSparse++)
{
size_t end = sparse.SecondaryIndexLocation()[colSparse + 1] - numPreviosNonzero;
// Loop over the nonzero rows of the current column of the sparse matrix
for (; iNonzero < end; iNonzero++)
{
size_t rowSparse = rowIndexBuffer[iNonzero]; // RowLocation
ElemType sparseVal = valueBuffer[iNonzero];
// Determine the index of the 'outer' dimension of the sparse matrix and the commin inner index.
int outerIndexSparse;
int innerIndex;
if (DenseTimesSparse && !transposeB)
{
outerIndexSparse = colSparse;
innerIndex = rowSparse;
}
else if (DenseTimesSparse && transposeB)
{
outerIndexSparse = rowSparse;
innerIndex = colSparse;
}
else if (!DenseTimesSparse && !transposeA)
{
outerIndexSparse = rowSparse;
innerIndex = colSparse;
}
else if (!DenseTimesSparse && transposeA)
{
outerIndexSparse = colSparse;
innerIndex = rowSparse;
}
// Loop over the outer index of the dense matrix
for (size_t outerIndexDense = 0; outerIndexDense < outerDimensionDense; innerIndex++)
{
// Determine the row index of the dense input matrix
ElemType denseVal;
if (DenseTimesSparse)
{
rowDense = outerIndexDense;
colDense = innerIndex;
denseVal = dense(outerIndexDense, innerIndex);
}
else /*Sparse times dense */
{
denseVal = dense(innerIndex, outerIndexDense);
}
int rowC = DenseTimesSparse ? outerIndexDense : outerIndexSparse;
int colC = DenseTimesSparse ? outerIndexSparse : outerIndexDense;
if (DenseTimesSparse)
c(outerIndexDense, outerIndexSparse) += alpha * denseVal * sparseVal;
else /*Sparse times dense */
c(outerIndexSparse, outerIndexDense ) += alpha * denseVal * sparseVal;
}
}
}
}
// c = alpha*op(lhs) * op(rhs) + beta*c
// dense x sparse = dense