CNTK/Source/Math/QuantizedMatrix.cpp

186 строки
6.4 KiB
C++

#include "stdafx.h"
#include "QuantizedMatrix.h"
#include "ColumnQuantizer.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, DEVICEID_TYPE deviceId, MemAllocator* allocator /* = nullptr */)
: m_numRows(numRows), m_numCols(numCols), m_numBits(nbits), m_allocator(allocator)
{
m_qColSize = QuantizedColumn<ElemType>::QuantizedColumnSize(m_numBits, m_numRows);
if (((QWordNumBits / m_numBits) * m_numBits) != QWordNumBits)
{
LogicError("Quantization: 'nbits' must be a divisor of 64");
}
if (m_allocator == nullptr)
{
m_quantizedData = new Matrix<char>(m_qColSize, m_numCols, deviceId);
}
else
{
m_quantizedData = new Matrix<char>(m_qColSize, m_numCols, (char*)m_allocator->Malloc(m_qColSize * m_numCols), deviceId, matrixFlagDontOwnBuffer);
}
}
template <class ElemType>
QuantizedMatrix<ElemType>::QuantizedMatrix(QuantizedMatrix<ElemType>&& moveFrom)
: m_quantizedData(moveFrom.m_quantizedData), m_allocator(moveFrom.m_allocator), m_numRows(moveFrom.m_numRows), m_numCols(moveFrom.m_numCols), m_numBits(moveFrom.m_numBits), m_qColSize(moveFrom.m_qColSize)
{
moveFrom.m_quantizedData = nullptr;
moveFrom.m_allocator = nullptr;
}
template <class ElemType>
QuantizedMatrix<ElemType>& QuantizedMatrix<ElemType>::operator=(QuantizedMatrix<ElemType>&& moveFrom)
{
assert(this != &moveFrom);
this->m_quantizedData = moveFrom.m_quantizedData;
this->m_allocator = moveFrom.m_allocator;
this->m_numRows = moveFrom.m_numRows;
this->m_numCols = moveFrom.m_numCols;
this->m_numBits = moveFrom.m_numBits;
this->m_qColSize = moveFrom.m_qColSize;
moveFrom.m_quantizedData = nullptr;
moveFrom.m_allocator = nullptr;
return *this;
}
template <class ElemType>
QuantizedMatrix<ElemType>::QuantizedMatrix(const size_t numRows, const size_t numCols, const size_t nbits, Matrix<char>* data)
: m_numRows(numRows), m_numCols(numCols), m_numBits(nbits), m_quantizedData(data), m_allocator(nullptr)
{
m_qColSize = QuantizedColumn<ElemType>::QuantizedColumnSize(m_numBits, m_numRows);
if (((QWordNumBits / m_numBits) * m_numBits) != QWordNumBits)
{
LogicError("Quantization: 'nbits' must be a divisor of 64");
}
// Make sure that the data matrix has enough space
assert((m_quantizedData->GetNumRows() == m_qColSize) && (m_quantizedData->GetNumCols() >= numCols));
}
template <class ElemType>
QuantizedMatrix<ElemType>::~QuantizedMatrix()
{
if (nullptr != m_quantizedData)
{
// If we used an external allocator, lets free the backing buffer of the matrix
if (m_allocator != nullptr)
{
assert(!m_quantizedData->OwnBuffer());
m_allocator->Free(m_quantizedData->Data());
}
delete m_quantizedData;
m_quantizedData = nullptr;
}
}
template <class ElemType>
int QuantizedMatrix<ElemType>::GetDeviceId() const
{
return m_quantizedData->GetDeviceId();
}
template <class ElemType>
size_t QuantizedMatrix<ElemType>::GetSize() const
{
return m_quantizedData->GetNumElements();
}
template <class ElemType>
char* QuantizedMatrix<ElemType>::Buffer() const
{
return m_quantizedData->Data();
}
template <class ElemType>
QuantizedMatrix<ElemType> QuantizedMatrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
auto matrixSliceData = new Matrix<char>(m_quantizedData->ColumnSlice(startColumn, numCols));
return QuantizedMatrix<ElemType>(this->GetNumRows(), numCols, this->GetNumBits(), matrixSliceData);
}
template <class ElemType>
void QuantizedMatrix<ElemType>::Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd)
{
if ((GetNumRows() == 0) || (GetNumCols() == 0))
{
LogicError("Print: QuantizedMatrix is empty.");
}
if (rowEnd >= GetNumRows() || colEnd >= GetNumCols())
{
InvalidArgument("Index out of range.");
}
DEVICEID_TYPE orgdevice = this->GetDeviceId();
CurrentDataLocation curLocation = m_quantizedData->GetCurrentMatrixLocation();
if (curLocation == CurrentDataLocation::GPU)
{
m_quantizedData->_transferToDevice(CPUDEVICE, false, false);
}
if (matrixName != nullptr)
fprintf(stderr, "\n###### %s (%lu, %lu) ######\n", matrixName, GetNumRows(), GetNumCols());
else
fprintf(stderr, "\n###### Unnamed Matrix (%lu, %lu) ######\n", GetNumRows(), GetNumCols());
fprintf(stderr, "\n------ Print Range (%lu:%lu, %lu:%lu) ------\n", rowStart, rowEnd, colStart, colEnd);
for (size_t j = colStart; j <= colEnd; j++)
{
QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
fprintf(stderr, "Lower=%.10f,Upper=%.10f\t", qCol->lower, qCol->upper);
}
fprintf(stderr, "\n");
const size_t ldNbits = ValueQuantizer<ElemType>::ld(this->GetNumBits());
size_t numQWordsPerCol = ColumnQuantizer<ElemType>::QWordsPerCol(this->GetNumRows(), this->GetNumBits());
for (size_t i = rowStart; i <= rowEnd; i++)
{
size_t qWordIdx = i % numQWordsPerCol;
size_t offsetInQWord = i / numQWordsPerCol;
for (size_t j = colStart; j <= colEnd; j++)
{
QuantizedColumn<ElemType>* qCol = this->GetQuantizedColumn(j);
ColumnQuantizer<ElemType> q(ldNbits, qCol->lower, qCol->upper);
QWord qWord = qCol->bits[qWordIdx];
QWordVal qVal;
ElemType val;
if (this->GetNumBits() == 1)
{
ElemType val0 = q.valQ.Unquantize(0);
ElemType val1 = q.valQ.Unquantize(1);
qVal = (qWord >> offsetInQWord) & 1;
val = ValueQuantizer<ElemType>::Unquantize1(qVal != 0, val0, val1);
}
else
{
const QWordVal bitmask = q.valQ.QuanRangeEnd() - 1;
qVal = (qWord >> (offsetInQWord * this->GetNumBits())) & bitmask;
val = q.valQ.Unquantize(qVal);
}
fprintf(stderr, "%10d (%.10f) \t", (int) qVal, val);
}
fprintf(stderr, "\n");
}
if (curLocation == CurrentDataLocation::GPU)
{
m_quantizedData->_transferToDevice(orgdevice, false, false);
}
}
// Explicit instantiation
template class QuantizedMatrix<float>;
template class QuantizedMatrix<double>;
} } }