Added code to verify MaskColumnsValue is unnecessary for CPU and GPU sparse matrix.

2016-04-20 16:06:16 -07:00 · 2016-04-20 16:06:16 -07:00 · fe47cde839
--- a/Source/Math/CPUSparseMatrix.cpp
+++ b/Source/Math/CPUSparseMatrix.cpp
@ -266,6 +266,39 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
    }
 }

+template <class ElemType>
+void CPUSparseMatrix<ElemType>::MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val)
+{
+    VerifyWritable(__func__);
+
+    size_t n = GetNumCols();
+    if (n != columnsMask.GetNumCols())
+        RuntimeError("Matrix and column mask must have equal number of columns");
+
+    if (val != 0)
+        RuntimeError("MaskColumnsValue is not implmented for a non-zero mask for sparse matrices.");
+
+
+    // Get the binary columns mask
+    char* maskedCols = columnsMask.Data();
+
+    if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
+    {
+        // If we're CSC, we only need to verify that the columns to be zeroed are empty.
+        GPUSPARSE_INDEX_TYPE* colVector = SecondaryIndexLocation();
+
+#pragma omp parallel for
+        for (long j = 0; j < n; j++)
+            if (maskedCols[j] == 0 && colVector[j + 1] != colVector[j])
+                RuntimeError("GPUSparseMatrix attempted to mask column %d, but it has %d elements in it.", j, (colVector[j + 1] - colVector[j]));
+    }
+    else
+    {
+        NOT_IMPLEMENTED;
+    }
+}
+
+
 template <class ElemType>
 void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const
 {
--- a/Source/Math/CPUSparseMatrix.h
+++ b/Source/Math/CPUSparseMatrix.h
@ -83,6 +83,7 @@ public:

    void SetValue(const size_t row, const size_t col, ElemType val);
    void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
+    void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);

    size_t BufferSize() const
    {
--- a/Source/Math/GPUSparseMatrix.cu
+++ b/Source/Math/GPUSparseMatrix.cu
@ -492,6 +492,59 @@ void GPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& denseMatrix,
    }
 }

+template <class ElemType>
+GPUSPARSE_INDEX_TYPE* GPUSparseMatrix<ElemType>::GetCondensedVector() const
+{
+    if (GetFormat() == MatrixFormat::matrixFormatSparseCSC || GetFormat() == MatrixFormat::matrixFormatSparseCSR)
+    {
+        PrepareDevice();
+        GPUSPARSE_INDEX_TYPE* pArray = new GPUSPARSE_INDEX_TYPE[SecondaryIndexCount()];
+        CUDA_CALL(cudaMemcpy(pArray, SecondaryIndexLocation(), sizeof(GPUSPARSE_INDEX_TYPE) * SecondaryIndexCount(), cudaMemcpyDeviceToHost));
+        return pArray;
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+template <class ElemType>
+void GPUSparseMatrix<ElemType>::MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val)
+{
+    VerifyWritable(__func__);
+
+    size_t n = GetNumCols();
+    if (n != columnsMask.GetNumCols())
+        RuntimeError("Matrix and column mask must have equal number of columns");
+
+    if (val != 0)
+        RuntimeError("MaskColumnsValue is not implmented for a non-zero mask for sparse matrices.");
+
+    // We could do this on the GPU, but for now C++ is easier.
+    
+    // Download the binary columns mask
+    char* maskedCols = columnsMask.CopyToArray();
+
+    if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
+    {
+        // If we're CSC, we only need to verify that the columns to be zeroed are empty, since val == 0.
+        // So just download the condensed column vector.
+        GPUSPARSE_INDEX_TYPE* colVector = GetCondensedVector();
+
+        // Verify that if the column is to be masked, there are no elements in it.
+        #pragma omp parallel for
+        for (long j = 0; j < n; j++)
+            if (maskedCols[j] == 0 && colVector[j + 1] != colVector[j])
+                RuntimeError("GPUSparseMatrix attempted to mask column %d, but it has %d elements in it.", j, (colVector[j + 1] - colVector[j]));
+
+    }
+    else
+    {
+        NOT_IMPLEMENTED;
+    }
+}
+
+
 template <class ElemType>
 GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::operator=(const GPUSparseMatrix<ElemType>& deepCopy)
 {
--- a/Source/Math/GPUSparseMatrix.h
+++ b/Source/Math/GPUSparseMatrix.h
@ -285,6 +285,9 @@ public:
    void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
    void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
    void SetValue(const GPUMatrix<ElemType>& denseMatrix);
+    
+    GPUSPARSE_INDEX_TYPE* GetCondensedVector() const;
+    void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);

    void Reshape(const size_t numRows, const size_t numCols);
    void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@ -1152,12 +1152,11 @@ void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemTyp
    else if (GetDeviceId() != columnsMask.GetDeviceId() && columnsMask.GetCurrentMatrixLocation() != BOTH)
        RuntimeError("MaskColumnsValue: Matrix and column mask must be on the same device.");

-    DISPATCH_MATRIX_ON_FLAG(this,
-                            this,
-                            m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val),
-                            m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val),
-                            NOT_IMPLEMENTED,
-                            NOT_IMPLEMENTED);
+    DISPATCH_MATRIX_ON_FLAG(this, this,
+        { m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val); },
+        { m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val); },
+        { m_CPUSparseMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val); },
+        { m_GPUSparseMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val); });
 }

 template <class ElemType>