Added Print function for QuantizedMatrix to aid debugging

2015-08-17 10:24:15 -07:00 · 2015-08-17 10:24:15 -07:00 · 3a2f92acf2
--- a/Math/CNTKMathTest/MatrixQuantizerTests.cpp
+++ b/Math/CNTKMathTest/MatrixQuantizerTests.cpp
@ -5,6 +5,9 @@
 //
 #include "stdafx.h"
 #include "CppUnitTest.h"
+#include "File.h"
+#include <memory>
+#include <io.h>

 #include "..\Math\MatrixQuantizer.h"
 #include "..\Math\CUDAPageLockedMemAllocator.h"
@ -17,6 +20,20 @@ using namespace Microsoft::MSR::CNTK;
 using namespace Microsoft::MSR::CNTK;
 using namespace Microsoft::VisualStudio::CppUnitTestFramework;

+//#define DEBUG_OUTPUT_PATH L"E:/temp/MatrixQuantizerTest.out.txt"
+
+#pragma warning (disable: 4996)
+
+void RedirectStdErr(wstring logpath)
+{
+    fprintf(stderr, "Redirecting stderr to file %S\n", logpath.c_str());
+    auto f = make_shared<File>(logpath.c_str(), fileOptionsWrite | fileOptionsText);
+    if (dup2(fileno(*f), 2) == -1)
+        RuntimeError("unexpected failure to redirect stderr to log file");
+    setvbuf(stderr, NULL, _IONBF, 16384);   // unbuffer it
+    static auto fKept = f;                  // keep it around (until it gets changed)
+}
+
 namespace CNTKMathTest
 {
    TEST_CLASS(MatrixQuantizerTests)
@ -130,14 +147,28 @@ namespace CNTKMathTest
                ElemType* gpuPrevResidualMatrix = quantizer->GetResidualMatrix().CopyToArray();
                ElemType *gpuPrevOutMatrix = outMatrix.CopyToArray();

-                QuantizedMatrix<ElemType> tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator);
+#ifdef DEBUG_OUTPUT_PATH
+                inMatrix.Print("Input Matrix", 0, 2, 0, 2);
+                quantizer->GetResidualMatrix().Print("Old Residual Matrix", 0, 2, 0, 2);
+                outMatrix.Print("Old Output Matrix", 0, 2, 0, 2);
+#endif

+                QuantizedMatrix<ElemType> tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator);
                quantizer->QuantizeAsync(tempCPUQuantizationBuffer);
                quantizer->WaitQuantizeAsyncDone();

+#ifdef DEBUG_OUTPUT_PATH
+                tempCPUQuantizationBuffer.Print("Quantized Matrix", 0, 2, 0, 2);
+                quantizer->GetResidualMatrix().Print("New residual Matrix", 0, 2, 0, 2);
+#endif
+
                quantizer->UnquantizeAsync(tempCPUQuantizationBuffer, outMatrix, (iterNum > 0));
                quantizer->WaitUnquantizeAsyncDone();

+#ifdef DEBUG_OUTPUT_PATH
+                outMatrix.Print("Unquantized Output Matrix", 0, 2, 0, 2);
+#endif
+
                // Now verify the quantization results
                ElemType* gpuNewResidualMatrix = quantizer->GetResidualMatrix().CopyToArray();
                ElemType* gpuNewOutMatrix = outMatrix.CopyToArray();
@ -251,6 +282,10 @@ namespace CNTKMathTest
        //This test will fail without GPU
        TEST_METHOD(Matrix1BitQuantize)
        {
+#ifdef DEBUG_OUTPUT_PATH
+            RedirectStdErr(DEBUG_OUTPUT_PATH);
+#endif
+
            // Test single precision 1bit quantization on CPU
            Test1BitQuantization<float>(CPUDEVICE);

--- a/Math/Math/ColumnQuantizer.h
+++ b/Math/Math/ColumnQuantizer.h
@ -316,6 +316,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    private:
        ValueQuantizer<ElemType> valQ;
+
+        template<typename T>
+        friend class QuantizedMatrix;
    };

 }}}
--- a/Math/Math/Math.vcxproj.filters
+++ b/Math/Math/Math.vcxproj.filters
@ -25,7 +25,7 @@
    <ClCompile Include="MatrixQuantizer.cpp" />
    <ClCompile Include="QuantizedMatrix.cpp" />
    <ClCompile Include="CUDAPageLockedMemAllocator.cpp">
-      <Filter>CPU\1bitSGD</Filter>
+      <Filter>GPU\1bitSGD</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
@ -57,13 +57,9 @@
    </ClInclude>
    <ClInclude Include="MatrixQuantizer.h" />
    <ClInclude Include="QuantizedMatrix.h" />
-      <Filter>CPU\1bitSGD</Filter>
-    </ClInclude>
-    <ClInclude Include="MemAllocator.h">
-      <Filter>CPU\1bitSGD</Filter>
-    </ClInclude>
+    <ClInclude Include="MemAllocator.h" />
    <ClInclude Include="CUDAPageLockedMemAllocator.h">
-      <Filter>CPU\1bitSGD</Filter>
+      <Filter>GPU\1bitSGD</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
--- a/Math/Math/Matrix.h
+++ b/Math/Math/Matrix.h
@ -472,6 +472,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        template<typename T>
        friend class MatrixQuantizer;
+
+        template<typename T>
+        friend class QuantizedMatrix;
    };

    typedef Matrix<float> SingleMatrix;
--- a/Math/Math/QuantizedMatrix.cpp
+++ b/Math/Math/QuantizedMatrix.cpp
@ -1,5 +1,6 @@
 #include "stdafx.h"
 #include "QuantizedMatrix.h"
+#include "ColumnQuantizer.h"

 namespace Microsoft { namespace MSR { namespace CNTK {
    
@ -107,6 +108,72 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        return QuantizedMatrix<ElemType>(this->GetNumRows(), numCols, this->GetNumBits(), matrixSliceData);
    }
    
+    template<class ElemType>
+    void QuantizedMatrix<ElemType>::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd)
+    {
+        if ((GetNumRows() == 0) || (GetNumCols() == 0))
+        {
+            throw std::logic_error("Print: QuantizedMatrix is empty.");
+        }
+
+        if (rowEnd >= GetNumRows() || colEnd >= GetNumCols())
+        {
+            throw std::invalid_argument("Index out of range.");
+        }
+
+        if (this->GetNumBits() != 1)
+        {
+            throw std::logic_error("QuantizedMatrix::Print is currently only supported for 1 bit.");
+        }
+
+        DEVICEID_TYPE orgdevice = this->GetDeviceId();
+        CurrentDataLocation curLocation = m_quantizedData->GetCurrentMatrixLocation();
+        if (curLocation == CurrentDataLocation::GPU)
+        {
+            m_quantizedData->_transferToDevice(CPUDEVICE, false, false);
+        }
+
+        if (matrixName != nullptr)
+            fprintf(stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols());
+        else
+            fprintf(stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols());
+
+        fprintf(stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd);
+
+        for (size_t j = colStart; j <= colEnd; j++)
+        {
+            QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
+            fprintf(stderr, "Lower=%.10f,Upper=%.10f\t", qCol->lower, qCol->upper);
+        }
+        fprintf(stderr, "\n");
+
+        const size_t ldNbits = ValueQuantizer<ElemType>::ld(this->GetNumBits());
+        size_t numQWordsPerCol = ColumnQuantizer<ElemType>::QWordsPerCol(this->GetNumRows(), this->GetNumBits());
+        for (size_t i = rowStart; i <= rowEnd; i++)
+        {
+            size_t qWordIdx = i % numQWordsPerCol;
+            size_t offsetInQWord = i / numQWordsPerCol;
+            for (size_t j = colStart; j <= colEnd; j++)
+            {
+                QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
+                ColumnQuantizer<ElemType> q(ldNbits, qCol->lower, qCol->upper);
+                ElemType val0 = q.valQ.Unquantize(0);
+                ElemType val1 = q.valQ.Unquantize(1);
+
+                QWord qWord = qCol->bits[qWordIdx];
+                bool qVal = ((qWord >> offsetInQWord) & 1) != 0;
+                ElemType val = ValueQuantizer<ElemType>::Unquantize1(qVal, val0, val1);
+                fprintf(stderr, "%1d (%.10f)                   \t", qVal ? 1 : 0, val);
+            }
+            fprintf(stderr, "\n");
+        }
+
+        if (curLocation == CurrentDataLocation::GPU)
+        {
+            m_quantizedData->_transferToDevice(orgdevice, false, false);
+        }
+    }
+
    // Explicit instantiation
    template class QuantizedMatrix<float>;
    template class QuantizedMatrix<double>;    
--- a/Math/Math/QuantizedMatrix.h
+++ b/Math/Math/QuantizedMatrix.h
@ -51,6 +51,7 @@ public:
 template<class ElemType>
 class MATH_API QuantizedMatrix
 {
+    typedef typename ValueQuantizer<ElemType>::QWord QWord;
    static const size_t QWordNumBits = ValueQuantizer<ElemType>::QWordNumBits;

 public:       
@ -94,6 +95,8 @@ public:
    
    QuantizedMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;

+    void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd);
+
 private:
    // Private constructor for creating quantized matrix column slices
    QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, Matrix<char>* data);