Added Print function for QuantizedMatrix to aid debugging

This commit is contained in:
Amit Agarwal 2015-08-17 10:24:15 -07:00
Родитель 70114245ea
Коммит 3a2f92acf2
6 изменённых файлов: 115 добавлений и 8 удалений

Просмотреть файл

@ -5,6 +5,9 @@
//
#include "stdafx.h"
#include "CppUnitTest.h"
#include "File.h"
#include <memory>
#include <io.h>
#include "..\Math\MatrixQuantizer.h"
#include "..\Math\CUDAPageLockedMemAllocator.h"
@ -17,6 +20,20 @@ using namespace Microsoft::MSR::CNTK;
using namespace Microsoft::MSR::CNTK;
using namespace Microsoft::VisualStudio::CppUnitTestFramework;
//#define DEBUG_OUTPUT_PATH L"E:/temp/MatrixQuantizerTest.out.txt"
#pragma warning (disable: 4996)
void RedirectStdErr(wstring logpath)
{
fprintf(stderr, "Redirecting stderr to file %S\n", logpath.c_str());
auto f = make_shared<File>(logpath.c_str(), fileOptionsWrite | fileOptionsText);
if (dup2(fileno(*f), 2) == -1)
RuntimeError("unexpected failure to redirect stderr to log file");
setvbuf(stderr, NULL, _IONBF, 16384); // unbuffer it
static auto fKept = f; // keep it around (until it gets changed)
}
namespace CNTKMathTest
{
TEST_CLASS(MatrixQuantizerTests)
@ -130,14 +147,28 @@ namespace CNTKMathTest
ElemType* gpuPrevResidualMatrix = quantizer->GetResidualMatrix().CopyToArray();
ElemType *gpuPrevOutMatrix = outMatrix.CopyToArray();
QuantizedMatrix<ElemType> tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator);
#ifdef DEBUG_OUTPUT_PATH
inMatrix.Print("Input Matrix", 0, 2, 0, 2);
quantizer->GetResidualMatrix().Print("Old Residual Matrix", 0, 2, 0, 2);
outMatrix.Print("Old Output Matrix", 0, 2, 0, 2);
#endif
QuantizedMatrix<ElemType> tempCPUQuantizationBuffer(numRows, numCols, 1, CPUDEVICE, allocator);
quantizer->QuantizeAsync(tempCPUQuantizationBuffer);
quantizer->WaitQuantizeAsyncDone();
#ifdef DEBUG_OUTPUT_PATH
tempCPUQuantizationBuffer.Print("Quantized Matrix", 0, 2, 0, 2);
quantizer->GetResidualMatrix().Print("New residual Matrix", 0, 2, 0, 2);
#endif
quantizer->UnquantizeAsync(tempCPUQuantizationBuffer, outMatrix, (iterNum > 0));
quantizer->WaitUnquantizeAsyncDone();
#ifdef DEBUG_OUTPUT_PATH
outMatrix.Print("Unquantized Output Matrix", 0, 2, 0, 2);
#endif
// Now verify the quantization results
ElemType* gpuNewResidualMatrix = quantizer->GetResidualMatrix().CopyToArray();
ElemType* gpuNewOutMatrix = outMatrix.CopyToArray();
@ -251,6 +282,10 @@ namespace CNTKMathTest
//This test will fail without GPU
TEST_METHOD(Matrix1BitQuantize)
{
#ifdef DEBUG_OUTPUT_PATH
RedirectStdErr(DEBUG_OUTPUT_PATH);
#endif
// Test single precision 1bit quantization on CPU
Test1BitQuantization<float>(CPUDEVICE);

Просмотреть файл

@ -316,6 +316,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
private:
ValueQuantizer<ElemType> valQ;
template<typename T>
friend class QuantizedMatrix;
};
}}}

Просмотреть файл

@ -25,7 +25,7 @@
<ClCompile Include="MatrixQuantizer.cpp" />
<ClCompile Include="QuantizedMatrix.cpp" />
<ClCompile Include="CUDAPageLockedMemAllocator.cpp">
<Filter>CPU\1bitSGD</Filter>
<Filter>GPU\1bitSGD</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
@ -57,13 +57,9 @@
</ClInclude>
<ClInclude Include="MatrixQuantizer.h" />
<ClInclude Include="QuantizedMatrix.h" />
<Filter>CPU\1bitSGD</Filter>
</ClInclude>
<ClInclude Include="MemAllocator.h">
<Filter>CPU\1bitSGD</Filter>
</ClInclude>
<ClInclude Include="MemAllocator.h" />
<ClInclude Include="CUDAPageLockedMemAllocator.h">
<Filter>CPU\1bitSGD</Filter>
<Filter>GPU\1bitSGD</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>

Просмотреть файл

@ -472,6 +472,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<typename T>
friend class MatrixQuantizer;
template<typename T>
friend class QuantizedMatrix;
};
typedef Matrix<float> SingleMatrix;

Просмотреть файл

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "QuantizedMatrix.h"
#include "ColumnQuantizer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
@ -107,6 +108,72 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return QuantizedMatrix<ElemType>(this->GetNumRows(), numCols, this->GetNumBits(), matrixSliceData);
}
template<class ElemType>
void QuantizedMatrix<ElemType>::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd)
{
if ((GetNumRows() == 0) || (GetNumCols() == 0))
{
throw std::logic_error("Print: QuantizedMatrix is empty.");
}
if (rowEnd >= GetNumRows() || colEnd >= GetNumCols())
{
throw std::invalid_argument("Index out of range.");
}
if (this->GetNumBits() != 1)
{
throw std::logic_error("QuantizedMatrix::Print is currently only supported for 1 bit.");
}
DEVICEID_TYPE orgdevice = this->GetDeviceId();
CurrentDataLocation curLocation = m_quantizedData->GetCurrentMatrixLocation();
if (curLocation == CurrentDataLocation::GPU)
{
m_quantizedData->_transferToDevice(CPUDEVICE, false, false);
}
if (matrixName != nullptr)
fprintf(stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols());
else
fprintf(stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols());
fprintf(stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd);
for (size_t j = colStart; j <= colEnd; j++)
{
QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
fprintf(stderr, "Lower=%.10f,Upper=%.10f\t", qCol->lower, qCol->upper);
}
fprintf(stderr, "\n");
const size_t ldNbits = ValueQuantizer<ElemType>::ld(this->GetNumBits());
size_t numQWordsPerCol = ColumnQuantizer<ElemType>::QWordsPerCol(this->GetNumRows(), this->GetNumBits());
for (size_t i = rowStart; i <= rowEnd; i++)
{
size_t qWordIdx = i % numQWordsPerCol;
size_t offsetInQWord = i / numQWordsPerCol;
for (size_t j = colStart; j <= colEnd; j++)
{
QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
ColumnQuantizer<ElemType> q(ldNbits, qCol->lower, qCol->upper);
ElemType val0 = q.valQ.Unquantize(0);
ElemType val1 = q.valQ.Unquantize(1);
QWord qWord = qCol->bits[qWordIdx];
bool qVal = ((qWord >> offsetInQWord) & 1) != 0;
ElemType val = ValueQuantizer<ElemType>::Unquantize1(qVal, val0, val1);
fprintf(stderr, "%1d (%.10f) \t", qVal ? 1 : 0, val);
}
fprintf(stderr, "\n");
}
if (curLocation == CurrentDataLocation::GPU)
{
m_quantizedData->_transferToDevice(orgdevice, false, false);
}
}
// Explicit instantiation
template class QuantizedMatrix<float>;
template class QuantizedMatrix<double>;

Просмотреть файл

@ -51,6 +51,7 @@ public:
template<class ElemType>
class MATH_API QuantizedMatrix
{
typedef typename ValueQuantizer<ElemType>::QWord QWord;
static const size_t QWordNumBits = ValueQuantizer<ElemType>::QWordNumBits;
public:
@ -94,6 +95,8 @@ public:
QuantizedMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd);
private:
// Private constructor for creating quantized matrix column slices
QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, Matrix<char>* data);