Remove LearnableParameterQuantized and MEL command to quantize a node

This commit is contained in:
Vadim Mazalov 2016-06-27 09:23:56 -07:00
Родитель 90079c6fa3
Коммит 7cfc3f358e
8 изменённых файлов: 28 добавлений и 119 удалений

Просмотреть файл

@ -10,7 +10,6 @@
#include "ModelEditLanguage.h"
#include "ConvolutionalNodes.h"
#include "InputAndParamNodes.h"
#include "Quantizers.h"
#include <map>
namespace Microsoft { namespace MSR { namespace CNTK {
@ -607,63 +606,6 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
fprintf(stderr, "Revise node %ls using parameter file %s\n", pNodes->NodeName().c_str(), paramPath.c_str());
}
}
else if (EqualInsensitive(name, "Quantize"))
{
int numFixedParams = 1;
int numOptParams = 1;
// Default quantizer is short, symmetric
// extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
// Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
// For quantization with shorts, recommended value of extraBits is 1-3.
//TODO: add regex pattern for the node name
if (params.size() > numFixedParams + numOptParams || params.size() < numFixedParams)
RuntimeError("Invalid number of parameters. Valid parameters: Quantize(nodeName, [extrabits=[0-5]])).");
std::string nodeName = params[0];
int extraBits = 0;
std::string propName, value;
if (OptionalParameter(params[params.size() - 1], propName, value))
{
try
{
extraBits = std::stoi(value);
}
catch (std::logic_error&)
{
InvalidArgument("Invalid optional parameter %s, valid value range for extrabits is [0-5]", propName.c_str());
}
if (!EqualInsensitive(propName, "extrabits") || extraBits < 0 || extraBits > 5)
{
InvalidArgument("Invalid optional parameter %s, valid optional parameters : extrabits = [0 - 5]", propName.c_str());
}
}
NetNdl<ElemType>* netNdl;
vector<ComputationNodeBasePtr> nodes = FindSymbols(nodeName, netNdl);
for (auto& pNode : nodes)
{
if (pNode->OperationName() != LearnableParameter<ElemType>::TypeName())
{
fprintf(stderr, "WARNING: you want to quantize the parameter of node (%ls), but it is not a learnable parameter (it is a %ls node). Skipping this node\n",
pNode->NodeName().c_str(), pNode->OperationName().c_str());
continue;
}
auto pParamNode = std::dynamic_pointer_cast<LearnableParameter<ElemType>>(pNode);
//shared_ptr<LearnableParameter<short>> pLearnaParamShort(new LearnableParameter<short>(-1, L"Some_Name"));
wstring quantizedNodeName = pNode->NodeName() + L"_quantized";
// Quantization to <short> is the only currently supported
shared_ptr<SymmetricQuantizer<ElemType, short>> quantizer(new SymmetricQuantizer<ElemType, short>(pParamNode->Value().Data(), pParamNode->Value().GetNumElements(), extraBits));
shared_ptr<LearnableParameterQuantized<ElemType, short>> pParamNodeQuant(new LearnableParameterQuantized<ElemType, short>(pParamNode->Value(), pNode->GetDeviceId(), quantizedNodeName, quantizer));
pNode->CopyTo(pParamNodeQuant, quantizedNodeName, CopyNodeFlags::copyNodeValue);
fprintf(stderr, "Quantize node %ls\n", pNode->NodeName().c_str());
}
}
else
{
RuntimeError("Unknown Editor function %s", name.c_str());

Просмотреть файл

@ -806,7 +806,6 @@ template <> map<size_t, map<size_t, shared_ptr<DoubleMatrix>>> ComputationNode<d
// instantiate the core class templates
// -----------------------------------------------------------------------
//template class ComputationNode<short>;
template class ComputationNode<float>;
template class ComputationNode<double>;

Просмотреть файл

@ -74,7 +74,7 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
// initialize with random numbers
// if 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing)
template <class ElemType>
/*virtual*/ void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
const unsigned long randomSeed,
const ElemType initValueScale,
bool initOnCPUOnly)
@ -162,6 +162,7 @@ void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& ar
VerifyDataSize(Value()); // sanity check
}
// TODO: Move this error check there, since this is called only from one place.
template <class ElemType>
void LearnableParameter<ElemType>::ReviseFromFile(const std::wstring& reviseFromFilePath)
{
@ -305,4 +306,7 @@ template <class ElemType>
SetLearningRateMultiplier(0);
}
template class LearnableParameter<float>;
template class LearnableParameter<double>;
}}}

Просмотреть файл

@ -5,7 +5,6 @@
#pragma once
#include "Basics.h"
#include "Quantizers.h"
#include "ComputationNode.h"
#include "ScriptableObjects.h"
#include "TensorShape.h"
@ -28,6 +27,7 @@ class LearnableParameter : public ComputationNode<ElemType>, public NumInputs<0>
static const std::wstring TypeName() { return L"LearnableParameter"; }
void InitShape(const TensorShape& shape);
// helper to initialize from a matrix read from a text file or a string literal
void InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols);
@ -56,11 +56,10 @@ public:
void InitRandom(const bool uniformInit, const unsigned long randomSeed, const ElemType initValueScale, bool initOnCPUOnly);
// initialize by reading a matrix from a text file
virtual void InitFromFile(const std::wstring& initFromFilePath);
void InitFromFile(const std::wstring& initFromFilePath);
// reload parameters from file
// This is called from MEL.
// TODO: Move this error check there, since this is called only from one place.
void ReviseFromFile(const std::wstring& reviseFromFilePath);
virtual void Save(File& fstream) const override;
@ -85,37 +84,6 @@ public:
virtual void FreezeParameters() override; // from IFreezable
};
// -----------------------------------------------------------------------
// LearnableParameterQuantized (/*no input*/)
// Represents quantized weight matrices and biases
// This node is for inference only and should not be used during training
// Expected workflow:
// (1) Train a model with LearnableParameter
// (2) To prepare the model for runtime, use BS to convert desired LearnableParameter nodes to LearnableParameterQuantized
// TODO: add -Node to the class name
// -----------------------------------------------------------------------
template <class ElemType, class QuantizedType>
class LearnableParameterQuantized : public LearnableParameter<ElemType>
{
public:
LearnableParameterQuantized(const Matrix<ElemType>& learnableParameterValue, DEVICEID_TYPE deviceId, std::wstring nodeName, std::shared_ptr<IQuantizerBase<ElemType, QuantizedType>> quantizer) :
LearnableParameter<ElemType>(deviceId, nodeName)
{
// create a temp array for demonstration before we figure out how to store quantized matrix
QuantizedType* quantizedData = new QuantizedType[learnableParameterValue.GetNumElements()];
quantizer->Quantize(learnableParameterValue.Data(), quantizedData, learnableParameterValue.GetNumElements());
delete[] quantizedData;
}
virtual void InitFromFile(const std::wstring& initFromFilePath) override;
virtual void Save(File& fstream) const override;
virtual void Load(File& fstream, size_t modelVersion) override;
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
};
// -----------------------------------------------------------------------
// DynamicAxisNode (/*no input*/)
// This is a holder for MBLayout objects shared across inputs.

Просмотреть файл

@ -2689,7 +2689,6 @@ template void GPUSparseMatrix<char>::CopyToCPUSparseMatrix(CPUSparseMatrix<char>
template void GPUSparseMatrix<char>::ChangeDeviceTo(int);
template void GPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
template void GPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
template void GPUSparseMatrix<char>::Reset();
template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<char>::SecondaryIndexValueAt(size_t) const;
template GPUSparseMatrix<char>::~GPUSparseMatrix();
@ -2713,7 +2712,6 @@ template void GPUSparseMatrix<short>::CopyToCPUSparseMatrix(CPUSparseMatrix<shor
template void GPUSparseMatrix<short>::ChangeDeviceTo(int);
template void GPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
template void GPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
template void GPUSparseMatrix<short>::Reset();
template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<short>::SecondaryIndexValueAt(size_t) const;
template GPUSparseMatrix<short>::~GPUSparseMatrix();
@ -2725,6 +2723,7 @@ template void GPUSparseMatrix<short>::ScaleAndAdd(short, GPUSparseMatrix<short>
template GPUSparseMatrix<int>::GPUSparseMatrix(DEVICEID_TYPE, const MatrixFormat);
template GPUSparseMatrix<int>::~GPUSparseMatrix();
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
template <class ElemType>
MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemType>& us)

Просмотреть файл

@ -1144,7 +1144,7 @@ template <>
/*static*/ short Matrix<short>::MakeNan(size_t)
{
return 0;
}
} // (needed for completeness)
template <class ElemType>
void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val)

Просмотреть файл

@ -4,25 +4,28 @@
//
#pragma once
#pragma warning(disable : 4127) // conditional expression is constant
namespace Microsoft { namespace MSR { namespace CNTK {
// RawType - input type to the quantizer
// RawType - input type to the quantizer. Currently CNTK supports float or double as RawType.
// QuantizedType - output type of the quantizer
template <class RawType, class QuantizedType>
class IQuantizerBase
class QuantizerBase
{
public:
virtual void Quantize(const RawType* input, QuantizedType* output, size_t arraySize) = 0;
virtual void Dequantize(const QuantizedType* input, RawType* output, size_t arraySize) = 0;
protected:
static int rangeMax;
static const int QuantizerBase<RawType, short>::rangeMax = std::numeric_limits<short>::max();
};
// Symmetric quantizer.
// Quantization is achieved by
// 1. Finding the absolute max of values to be quantized.
// 2. Adjusting the absolute max with extraBits parameters.
// 3. Scaling all values in the collection to be within the symmetric range of the QuantizedType
template <class RawType, class QuantizedType>
class SymmetricQuantizer : public IQuantizerBase<RawType, QuantizedType>
class SymmetricQuantizer : public QuantizerBase<RawType, QuantizedType>
{
RawType m_quantizer;
RawType m_inverseQuantizer;
@ -32,12 +35,10 @@ public:
// extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
// Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
// For quantization with shorts, recommended value of extraBits is 1-3.
SymmetricQuantizer(RawType* elements, size_t elementsSize, size_t extraBits)
// This constructor accepts the collection of RawType to initialize internal quantizer
// and then apply this quantizer to collections with similar range as the one it was initialized with.
SymmetricQuantizer(const RawType* elements, size_t elementsSize, size_t extraBits)
{
if (elementsSize == 0)
{
LogicError("The sequence to be quantized is empty.");
}
m_absMax = FindAbsMax(elements, elementsSize);
SymmetricQuantizer(m_absMax, extraBits);
}
@ -55,6 +56,7 @@ public:
m_inverseQuantizer = 1 / m_quantizer;
}
// Perform quantization of the input collection, put result into pre-allocated output collection
virtual void Quantize(const RawType* input, QuantizedType* output, size_t inputSize)
{
for (size_t i = 0; i < inputSize; i++)
@ -66,14 +68,12 @@ public:
}
}
// Accept quantized collection as input, put de-quantization result into pre-allocated output collection.
virtual void Dequantize(const QuantizedType* input, RawType* output, size_t inputSize)
{
for (size_t i = 0; i < inputSize; i++)
{
output[i] = (RawType)(input[i] * m_inverseQuantizer);
#ifdef _DEBUG
assert(abs(output[i]) <= m_absMax);
#endif
}
}
@ -81,8 +81,8 @@ private:
// Find absolute maximum value
RawType FindAbsMax(RawType* elements, size_t elementsSize)
{
// in constructor we asserted that arraySize > 0
RawType maxElem, minElem = elements[0];
RawType maxElem = std::numeric_limits<float>::min();
RawType minElem = std::numeric_limits<float>::max();
for (size_t i = 0; i < elementsSize; i++)
{
maxElem = std::max(maxElem, elements[i]);
@ -93,7 +93,4 @@ private:
}
};
int IQuantizerBase<float, short>::rangeMax = SHRT_MAX;
int IQuantizerBase<double, short>::rangeMax = SHRT_MAX;
}}}