From 3a2f92acf2269143d40ecaa0c7bf91c388b402c2 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Mon, 17 Aug 2015 10:24:15 -0700 Subject: [PATCH] Added Print function for QuantizedMatrix to aid debugging --- Math/CNTKMathTest/MatrixQuantizerTests.cpp | 37 +++++++++++- Math/Math/ColumnQuantizer.h | 3 + Math/Math/Math.vcxproj.filters | 10 +--- Math/Math/Matrix.h | 3 + Math/Math/QuantizedMatrix.cpp | 67 ++++++++++++++++++++++ Math/Math/QuantizedMatrix.h | 3 + 6 files changed, 115 insertions(+), 8 deletions(-) diff --git a/Math/CNTKMathTest/MatrixQuantizerTests.cpp b/Math/CNTKMathTest/MatrixQuantizerTests.cpp index 5f5870e5e..f56badfad 100644 --- a/Math/CNTKMathTest/MatrixQuantizerTests.cpp +++ b/Math/CNTKMathTest/MatrixQuantizerTests.cpp @@ -5,6 +5,9 @@ // #include "stdafx.h" #include "CppUnitTest.h" +#include "File.h" +#include +#include #include "..\Math\MatrixQuantizer.h" #include "..\Math\CUDAPageLockedMemAllocator.h" @@ -17,6 +20,20 @@ using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK; using namespace Microsoft::VisualStudio::CppUnitTestFramework; +//#define DEBUG_OUTPUT_PATH L"E:/temp/MatrixQuantizerTest.out.txt" + +#pragma warning (disable: 4996) + +void RedirectStdErr(wstring logpath) +{ + fprintf(stderr, "Redirecting stderr to file %S\n", logpath.c_str()); + auto f = make_shared(logpath.c_str(), fileOptionsWrite | fileOptionsText); + if (dup2(fileno(*f), 2) == -1) + RuntimeError("unexpected failure to redirect stderr to log file"); + setvbuf(stderr, NULL, _IONBF, 16384); // unbuffer it + static auto fKept = f; // keep it around (until it gets changed) +} + namespace CNTKMathTest { TEST_CLASS(MatrixQuantizerTests) @@ -130,14 +147,28 @@ namespace CNTKMathTest ElemType* gpuPrevResidualMatrix = quantizer->GetResidualMatrix().CopyToArray(); ElemType *gpuPrevOutMatrix = outMatrix.CopyToArray(); - QuantizedMatrix tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator); +#ifdef DEBUG_OUTPUT_PATH + inMatrix.Print("Input Matrix", 0, 2, 0, 2); + quantizer->GetResidualMatrix().Print("Old Residual Matrix", 0, 2, 0, 2); + outMatrix.Print("Old Output Matrix", 0, 2, 0, 2); +#endif + QuantizedMatrix tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator); quantizer->QuantizeAsync(tempCPUQuantizationBuffer); quantizer->WaitQuantizeAsyncDone(); +#ifdef DEBUG_OUTPUT_PATH + tempCPUQuantizationBuffer.Print("Quantized Matrix", 0, 2, 0, 2); + quantizer->GetResidualMatrix().Print("New residual Matrix", 0, 2, 0, 2); +#endif + quantizer->UnquantizeAsync(tempCPUQuantizationBuffer, outMatrix, (iterNum > 0)); quantizer->WaitUnquantizeAsyncDone(); +#ifdef DEBUG_OUTPUT_PATH + outMatrix.Print("Unquantized Output Matrix", 0, 2, 0, 2); +#endif + // Now verify the quantization results ElemType* gpuNewResidualMatrix = quantizer->GetResidualMatrix().CopyToArray(); ElemType* gpuNewOutMatrix = outMatrix.CopyToArray(); @@ -251,6 +282,10 @@ namespace CNTKMathTest //This test will fail without GPU TEST_METHOD(Matrix1BitQuantize) { +#ifdef DEBUG_OUTPUT_PATH + RedirectStdErr(DEBUG_OUTPUT_PATH); +#endif + // Test single precision 1bit quantization on CPU Test1BitQuantization(CPUDEVICE); diff --git a/Math/Math/ColumnQuantizer.h b/Math/Math/ColumnQuantizer.h index e3b62e092..e34aa9c59 100644 --- a/Math/Math/ColumnQuantizer.h +++ b/Math/Math/ColumnQuantizer.h @@ -316,6 +316,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { private: ValueQuantizer valQ; + + template + friend class QuantizedMatrix; }; }}} diff --git a/Math/Math/Math.vcxproj.filters b/Math/Math/Math.vcxproj.filters index bc8a1d478..88be01b2c 100644 --- a/Math/Math/Math.vcxproj.filters +++ b/Math/Math/Math.vcxproj.filters @@ -25,7 +25,7 @@ - CPU\1bitSGD + GPU\1bitSGD @@ -57,13 +57,9 @@ - CPU\1bitSGD - - - CPU\1bitSGD - + - CPU\1bitSGD + GPU\1bitSGD diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index dd19d9688..06f6f0bca 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -472,6 +472,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template friend class MatrixQuantizer; + + template + friend class QuantizedMatrix; }; typedef Matrix SingleMatrix; diff --git a/Math/Math/QuantizedMatrix.cpp b/Math/Math/QuantizedMatrix.cpp index 752056f7e..fd1b1fe07 100644 --- a/Math/Math/QuantizedMatrix.cpp +++ b/Math/Math/QuantizedMatrix.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "QuantizedMatrix.h" +#include "ColumnQuantizer.h" namespace Microsoft { namespace MSR { namespace CNTK { @@ -107,6 +108,72 @@ namespace Microsoft { namespace MSR { namespace CNTK { return QuantizedMatrix(this->GetNumRows(), numCols, this->GetNumBits(), matrixSliceData); } + template + void QuantizedMatrix::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) + { + if ((GetNumRows() == 0) || (GetNumCols() == 0)) + { + throw std::logic_error("Print: QuantizedMatrix is empty."); + } + + if (rowEnd >= GetNumRows() || colEnd >= GetNumCols()) + { + throw std::invalid_argument("Index out of range."); + } + + if (this->GetNumBits() != 1) + { + throw std::logic_error("QuantizedMatrix::Print is currently only supported for 1 bit."); + } + + DEVICEID_TYPE orgdevice = this->GetDeviceId(); + CurrentDataLocation curLocation = m_quantizedData->GetCurrentMatrixLocation(); + if (curLocation == CurrentDataLocation::GPU) + { + m_quantizedData->_transferToDevice(CPUDEVICE, false, false); + } + + if (matrixName != nullptr) + fprintf(stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols()); + else + fprintf(stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols()); + + fprintf(stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd); + + for (size_t j = colStart; j <= colEnd; j++) + { + QuantizedColumn* qCol = this->GetQuantizedColumn(j); + fprintf(stderr, "Lower=%.10f,Upper=%.10f\t", qCol->lower, qCol->upper); + } + fprintf(stderr, "\n"); + + const size_t ldNbits = ValueQuantizer::ld(this->GetNumBits()); + size_t numQWordsPerCol = ColumnQuantizer::QWordsPerCol(this->GetNumRows(), this->GetNumBits()); + for (size_t i = rowStart; i <= rowEnd; i++) + { + size_t qWordIdx = i % numQWordsPerCol; + size_t offsetInQWord = i / numQWordsPerCol; + for (size_t j = colStart; j <= colEnd; j++) + { + QuantizedColumn* qCol = this->GetQuantizedColumn(j); + ColumnQuantizer q(ldNbits, qCol->lower, qCol->upper); + ElemType val0 = q.valQ.Unquantize(0); + ElemType val1 = q.valQ.Unquantize(1); + + QWord qWord = qCol->bits[qWordIdx]; + bool qVal = ((qWord >> offsetInQWord) & 1) != 0; + ElemType val = ValueQuantizer::Unquantize1(qVal, val0, val1); + fprintf(stderr, "%1d (%.10f) \t", qVal ? 1 : 0, val); + } + fprintf(stderr, "\n"); + } + + if (curLocation == CurrentDataLocation::GPU) + { + m_quantizedData->_transferToDevice(orgdevice, false, false); + } + } + // Explicit instantiation template class QuantizedMatrix; template class QuantizedMatrix; diff --git a/Math/Math/QuantizedMatrix.h b/Math/Math/QuantizedMatrix.h index 96d79ffc4..327ae2746 100644 --- a/Math/Math/QuantizedMatrix.h +++ b/Math/Math/QuantizedMatrix.h @@ -51,6 +51,7 @@ public: template class MATH_API QuantizedMatrix { + typedef typename ValueQuantizer::QWord QWord; static const size_t QWordNumBits = ValueQuantizer::QWordNumBits; public: @@ -94,6 +95,8 @@ public: QuantizedMatrix ColumnSlice(size_t startColumn, size_t numCols) const; + void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd); + private: // Private constructor for creating quantized matrix column slices QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, Matrix* data);