Remove LearnableParameterQuantized and MEL command to quantize a node
This commit is contained in:
Родитель
90079c6fa3
Коммит
7cfc3f358e
|
@ -10,7 +10,6 @@
|
|||
#include "ModelEditLanguage.h"
|
||||
#include "ConvolutionalNodes.h"
|
||||
#include "InputAndParamNodes.h"
|
||||
#include "Quantizers.h"
|
||||
#include <map>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
@ -607,63 +606,6 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
fprintf(stderr, "Revise node %ls using parameter file %s\n", pNodes->NodeName().c_str(), paramPath.c_str());
|
||||
}
|
||||
}
|
||||
else if (EqualInsensitive(name, "Quantize"))
|
||||
{
|
||||
int numFixedParams = 1;
|
||||
int numOptParams = 1;
|
||||
// Default quantizer is short, symmetric
|
||||
// extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
|
||||
// Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
|
||||
// For quantization with shorts, recommended value of extraBits is 1-3.
|
||||
|
||||
//TODO: add regex pattern for the node name
|
||||
if (params.size() > numFixedParams + numOptParams || params.size() < numFixedParams)
|
||||
RuntimeError("Invalid number of parameters. Valid parameters: Quantize(nodeName, [extrabits=[0-5]])).");
|
||||
|
||||
std::string nodeName = params[0];
|
||||
int extraBits = 0;
|
||||
std::string propName, value;
|
||||
if (OptionalParameter(params[params.size() - 1], propName, value))
|
||||
{
|
||||
try
|
||||
{
|
||||
extraBits = std::stoi(value);
|
||||
}
|
||||
catch (std::logic_error&)
|
||||
{
|
||||
InvalidArgument("Invalid optional parameter %s, valid value range for extrabits is [0-5]", propName.c_str());
|
||||
}
|
||||
|
||||
if (!EqualInsensitive(propName, "extrabits") || extraBits < 0 || extraBits > 5)
|
||||
{
|
||||
InvalidArgument("Invalid optional parameter %s, valid optional parameters : extrabits = [0 - 5]", propName.c_str());
|
||||
}
|
||||
}
|
||||
NetNdl<ElemType>* netNdl;
|
||||
vector<ComputationNodeBasePtr> nodes = FindSymbols(nodeName, netNdl);
|
||||
|
||||
for (auto& pNode : nodes)
|
||||
{
|
||||
if (pNode->OperationName() != LearnableParameter<ElemType>::TypeName())
|
||||
{
|
||||
fprintf(stderr, "WARNING: you want to quantize the parameter of node (%ls), but it is not a learnable parameter (it is a %ls node). Skipping this node\n",
|
||||
pNode->NodeName().c_str(), pNode->OperationName().c_str());
|
||||
continue;
|
||||
}
|
||||
auto pParamNode = std::dynamic_pointer_cast<LearnableParameter<ElemType>>(pNode);
|
||||
|
||||
//shared_ptr<LearnableParameter<short>> pLearnaParamShort(new LearnableParameter<short>(-1, L"Some_Name"));
|
||||
|
||||
wstring quantizedNodeName = pNode->NodeName() + L"_quantized";
|
||||
// Quantization to <short> is the only currently supported
|
||||
shared_ptr<SymmetricQuantizer<ElemType, short>> quantizer(new SymmetricQuantizer<ElemType, short>(pParamNode->Value().Data(), pParamNode->Value().GetNumElements(), extraBits));
|
||||
|
||||
shared_ptr<LearnableParameterQuantized<ElemType, short>> pParamNodeQuant(new LearnableParameterQuantized<ElemType, short>(pParamNode->Value(), pNode->GetDeviceId(), quantizedNodeName, quantizer));
|
||||
pNode->CopyTo(pParamNodeQuant, quantizedNodeName, CopyNodeFlags::copyNodeValue);
|
||||
|
||||
fprintf(stderr, "Quantize node %ls\n", pNode->NodeName().c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("Unknown Editor function %s", name.c_str());
|
||||
|
|
|
@ -806,7 +806,6 @@ template <> map<size_t, map<size_t, shared_ptr<DoubleMatrix>>> ComputationNode<d
|
|||
// instantiate the core class templates
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
//template class ComputationNode<short>;
|
||||
template class ComputationNode<float>;
|
||||
template class ComputationNode<double>;
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
|
|||
// initialize with random numbers
|
||||
// if 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing)
|
||||
template <class ElemType>
|
||||
/*virtual*/ void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
|
||||
void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
|
||||
const unsigned long randomSeed,
|
||||
const ElemType initValueScale,
|
||||
bool initOnCPUOnly)
|
||||
|
@ -162,6 +162,7 @@ void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& ar
|
|||
VerifyDataSize(Value()); // sanity check
|
||||
}
|
||||
|
||||
// TODO: Move this error check there, since this is called only from one place.
|
||||
template <class ElemType>
|
||||
void LearnableParameter<ElemType>::ReviseFromFile(const std::wstring& reviseFromFilePath)
|
||||
{
|
||||
|
@ -305,4 +306,7 @@ template <class ElemType>
|
|||
SetLearningRateMultiplier(0);
|
||||
}
|
||||
|
||||
template class LearnableParameter<float>;
|
||||
template class LearnableParameter<double>;
|
||||
|
||||
}}}
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "Basics.h"
|
||||
#include "Quantizers.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "TensorShape.h"
|
||||
|
@ -28,6 +27,7 @@ class LearnableParameter : public ComputationNode<ElemType>, public NumInputs<0>
|
|||
static const std::wstring TypeName() { return L"LearnableParameter"; }
|
||||
|
||||
void InitShape(const TensorShape& shape);
|
||||
|
||||
// helper to initialize from a matrix read from a text file or a string literal
|
||||
void InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols);
|
||||
|
||||
|
@ -56,11 +56,10 @@ public:
|
|||
void InitRandom(const bool uniformInit, const unsigned long randomSeed, const ElemType initValueScale, bool initOnCPUOnly);
|
||||
|
||||
// initialize by reading a matrix from a text file
|
||||
virtual void InitFromFile(const std::wstring& initFromFilePath);
|
||||
void InitFromFile(const std::wstring& initFromFilePath);
|
||||
|
||||
// reload parameters from file
|
||||
// This is called from MEL.
|
||||
// TODO: Move this error check there, since this is called only from one place.
|
||||
void ReviseFromFile(const std::wstring& reviseFromFilePath);
|
||||
|
||||
virtual void Save(File& fstream) const override;
|
||||
|
@ -85,37 +84,6 @@ public:
|
|||
virtual void FreezeParameters() override; // from IFreezable
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// LearnableParameterQuantized (/*no input*/)
|
||||
// Represents quantized weight matrices and biases
|
||||
// This node is for inference only and should not be used during training
|
||||
// Expected workflow:
|
||||
// (1) Train a model with LearnableParameter
|
||||
// (2) To prepare the model for runtime, use BS to convert desired LearnableParameter nodes to LearnableParameterQuantized
|
||||
// TODO: add -Node to the class name
|
||||
// -----------------------------------------------------------------------
|
||||
template <class ElemType, class QuantizedType>
|
||||
class LearnableParameterQuantized : public LearnableParameter<ElemType>
|
||||
{
|
||||
public:
|
||||
LearnableParameterQuantized(const Matrix<ElemType>& learnableParameterValue, DEVICEID_TYPE deviceId, std::wstring nodeName, std::shared_ptr<IQuantizerBase<ElemType, QuantizedType>> quantizer) :
|
||||
LearnableParameter<ElemType>(deviceId, nodeName)
|
||||
{
|
||||
// create a temp array for demonstration before we figure out how to store quantized matrix
|
||||
QuantizedType* quantizedData = new QuantizedType[learnableParameterValue.GetNumElements()];
|
||||
quantizer->Quantize(learnableParameterValue.Data(), quantizedData, learnableParameterValue.GetNumElements());
|
||||
|
||||
delete[] quantizedData;
|
||||
}
|
||||
|
||||
virtual void InitFromFile(const std::wstring& initFromFilePath) override;
|
||||
|
||||
virtual void Save(File& fstream) const override;
|
||||
virtual void Load(File& fstream, size_t modelVersion) override;
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DynamicAxisNode (/*no input*/)
|
||||
// This is a holder for MBLayout objects shared across inputs.
|
||||
|
|
|
@ -2689,7 +2689,6 @@ template void GPUSparseMatrix<char>::CopyToCPUSparseMatrix(CPUSparseMatrix<char>
|
|||
template void GPUSparseMatrix<char>::ChangeDeviceTo(int);
|
||||
template void GPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
|
||||
template void GPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
|
||||
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
|
||||
template void GPUSparseMatrix<char>::Reset();
|
||||
template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<char>::SecondaryIndexValueAt(size_t) const;
|
||||
template GPUSparseMatrix<char>::~GPUSparseMatrix();
|
||||
|
@ -2713,7 +2712,6 @@ template void GPUSparseMatrix<short>::CopyToCPUSparseMatrix(CPUSparseMatrix<shor
|
|||
template void GPUSparseMatrix<short>::ChangeDeviceTo(int);
|
||||
template void GPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
|
||||
template void GPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
|
||||
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
|
||||
template void GPUSparseMatrix<short>::Reset();
|
||||
template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<short>::SecondaryIndexValueAt(size_t) const;
|
||||
template GPUSparseMatrix<short>::~GPUSparseMatrix();
|
||||
|
@ -2725,6 +2723,7 @@ template void GPUSparseMatrix<short>::ScaleAndAdd(short, GPUSparseMatrix<short>
|
|||
|
||||
template GPUSparseMatrix<int>::GPUSparseMatrix(DEVICEID_TYPE, const MatrixFormat);
|
||||
template GPUSparseMatrix<int>::~GPUSparseMatrix();
|
||||
template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
|
||||
|
||||
template <class ElemType>
|
||||
MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemType>& us)
|
||||
|
|
|
@ -1144,7 +1144,7 @@ template <>
|
|||
/*static*/ short Matrix<short>::MakeNan(size_t)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
} // (needed for completeness)
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val)
|
||||
|
|
|
@ -4,25 +4,28 @@
|
|||
//
|
||||
#pragma once
|
||||
|
||||
#pragma warning(disable : 4127) // conditional expression is constant
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// RawType - input type to the quantizer
|
||||
// RawType - input type to the quantizer. Currently CNTK supports float or double as RawType.
|
||||
// QuantizedType - output type of the quantizer
|
||||
template <class RawType, class QuantizedType>
|
||||
class IQuantizerBase
|
||||
class QuantizerBase
|
||||
{
|
||||
public:
|
||||
virtual void Quantize(const RawType* input, QuantizedType* output, size_t arraySize) = 0;
|
||||
virtual void Dequantize(const QuantizedType* input, RawType* output, size_t arraySize) = 0;
|
||||
|
||||
protected:
|
||||
static int rangeMax;
|
||||
static const int QuantizerBase<RawType, short>::rangeMax = std::numeric_limits<short>::max();
|
||||
};
|
||||
|
||||
// Symmetric quantizer.
|
||||
// Quantization is achieved by
|
||||
// 1. Finding the absolute max of values to be quantized.
|
||||
// 2. Adjusting the absolute max with extraBits parameters.
|
||||
// 3. Scaling all values in the collection to be within the symmetric range of the QuantizedType
|
||||
template <class RawType, class QuantizedType>
|
||||
class SymmetricQuantizer : public IQuantizerBase<RawType, QuantizedType>
|
||||
class SymmetricQuantizer : public QuantizerBase<RawType, QuantizedType>
|
||||
{
|
||||
RawType m_quantizer;
|
||||
RawType m_inverseQuantizer;
|
||||
|
@ -32,12 +35,10 @@ public:
|
|||
// extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
|
||||
// Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
|
||||
// For quantization with shorts, recommended value of extraBits is 1-3.
|
||||
SymmetricQuantizer(RawType* elements, size_t elementsSize, size_t extraBits)
|
||||
// This constructor accepts the collection of RawType to initialize internal quantizer
|
||||
// and then apply this quantizer to collections with similar range as the one it was initialized with.
|
||||
SymmetricQuantizer(const RawType* elements, size_t elementsSize, size_t extraBits)
|
||||
{
|
||||
if (elementsSize == 0)
|
||||
{
|
||||
LogicError("The sequence to be quantized is empty.");
|
||||
}
|
||||
m_absMax = FindAbsMax(elements, elementsSize);
|
||||
SymmetricQuantizer(m_absMax, extraBits);
|
||||
}
|
||||
|
@ -55,6 +56,7 @@ public:
|
|||
m_inverseQuantizer = 1 / m_quantizer;
|
||||
}
|
||||
|
||||
// Perform quantization of the input collection, put result into pre-allocated output collection
|
||||
virtual void Quantize(const RawType* input, QuantizedType* output, size_t inputSize)
|
||||
{
|
||||
for (size_t i = 0; i < inputSize; i++)
|
||||
|
@ -66,14 +68,12 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
// Accept quantized collection as input, put de-quantization result into pre-allocated output collection.
|
||||
virtual void Dequantize(const QuantizedType* input, RawType* output, size_t inputSize)
|
||||
{
|
||||
for (size_t i = 0; i < inputSize; i++)
|
||||
{
|
||||
output[i] = (RawType)(input[i] * m_inverseQuantizer);
|
||||
#ifdef _DEBUG
|
||||
assert(abs(output[i]) <= m_absMax);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,8 +81,8 @@ private:
|
|||
// Find absolute maximum value
|
||||
RawType FindAbsMax(RawType* elements, size_t elementsSize)
|
||||
{
|
||||
// in constructor we asserted that arraySize > 0
|
||||
RawType maxElem, minElem = elements[0];
|
||||
RawType maxElem = std::numeric_limits<float>::min();
|
||||
RawType minElem = std::numeric_limits<float>::max();
|
||||
for (size_t i = 0; i < elementsSize; i++)
|
||||
{
|
||||
maxElem = std::max(maxElem, elements[i]);
|
||||
|
@ -93,7 +93,4 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
int IQuantizerBase<float, short>::rangeMax = SHRT_MAX;
|
||||
int IQuantizerBase<double, short>::rangeMax = SHRT_MAX;
|
||||
|
||||
}}}
|
Загрузка…
Ссылка в новой задаче