Remove LearnableParameterQuantized and MEL command to quantize a node

2016-06-27 09:23:56 -07:00 · 2016-06-27 09:23:56 -07:00 · 7cfc3f358e
--- a/Source/CNTK/ModelEditLanguage.cpp
+++ b/Source/CNTK/ModelEditLanguage.cpp
@ -10,7 +10,6 @@
 #include "ModelEditLanguage.h"
 #include "ConvolutionalNodes.h"
 #include "InputAndParamNodes.h"
-#include "Quantizers.h"
 #include <map>

 namespace Microsoft { namespace MSR { namespace CNTK {
@ -607,63 +606,6 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
            fprintf(stderr, "Revise node %ls using parameter file %s\n", pNodes->NodeName().c_str(), paramPath.c_str());
        }
    }
-    else if (EqualInsensitive(name, "Quantize"))
-    {
-        int numFixedParams = 1;
-        int numOptParams = 1;
-        // Default quantizer is short, symmetric
-        // extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
-        // Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
-        // For quantization with shorts, recommended value of extraBits is 1-3.
-
-        //TODO: add regex pattern for the node name
-        if (params.size() > numFixedParams + numOptParams || params.size() < numFixedParams)
-            RuntimeError("Invalid number of parameters. Valid parameters: Quantize(nodeName, [extrabits=[0-5]])).");
-
-        std::string nodeName = params[0];
-        int extraBits = 0;
-        std::string propName, value;
-        if (OptionalParameter(params[params.size() - 1], propName, value))
-        {
-            try
-            {
-                extraBits = std::stoi(value);
-            }
-            catch (std::logic_error&)
-            {
-                InvalidArgument("Invalid optional parameter %s, valid value range for extrabits is [0-5]", propName.c_str());
-            }
-
-            if (!EqualInsensitive(propName, "extrabits") || extraBits < 0 || extraBits > 5)
-            {
-                InvalidArgument("Invalid optional parameter %s, valid optional parameters : extrabits = [0 - 5]", propName.c_str()); 
-            }
-        }
-        NetNdl<ElemType>* netNdl;
-        vector<ComputationNodeBasePtr> nodes = FindSymbols(nodeName, netNdl);
-
-        for (auto& pNode : nodes)
-        {
-            if (pNode->OperationName() != LearnableParameter<ElemType>::TypeName())
-            {
-                fprintf(stderr, "WARNING: you want to quantize the parameter of node (%ls), but it is not a learnable parameter (it is a %ls node). Skipping this node\n",
-                    pNode->NodeName().c_str(), pNode->OperationName().c_str());
-                continue;
-            }
-            auto pParamNode = std::dynamic_pointer_cast<LearnableParameter<ElemType>>(pNode);
-
-            //shared_ptr<LearnableParameter<short>> pLearnaParamShort(new LearnableParameter<short>(-1, L"Some_Name"));
-
-            wstring quantizedNodeName = pNode->NodeName() + L"_quantized";
-            // Quantization to <short> is the only currently supported
-            shared_ptr<SymmetricQuantizer<ElemType, short>> quantizer(new SymmetricQuantizer<ElemType, short>(pParamNode->Value().Data(), pParamNode->Value().GetNumElements(), extraBits));
-
-            shared_ptr<LearnableParameterQuantized<ElemType, short>> pParamNodeQuant(new LearnableParameterQuantized<ElemType, short>(pParamNode->Value(), pNode->GetDeviceId(), quantizedNodeName, quantizer));
-            pNode->CopyTo(pParamNodeQuant, quantizedNodeName, CopyNodeFlags::copyNodeValue);
-
-            fprintf(stderr, "Quantize node %ls\n", pNode->NodeName().c_str());
-        }
-    }
    else
    {
        RuntimeError("Unknown Editor function %s", name.c_str());
--- a/Source/ComputationNetworkLib/ComputationNode.cpp
+++ b/Source/ComputationNetworkLib/ComputationNode.cpp
@ -806,7 +806,6 @@ template <> map<size_t, map<size_t, shared_ptr<DoubleMatrix>>> ComputationNode<d
 // instantiate the core class templates
 // -----------------------------------------------------------------------

-//template class ComputationNode<short>;
 template class ComputationNode<float>;
 template class ComputationNode<double>;

--- a/Source/ComputationNetworkLib/InputAndParamNodes.cpp
+++ b/Source/ComputationNetworkLib/InputAndParamNodes.cpp
@ -74,7 +74,7 @@ LearnableParameter<ElemType>::LearnableParameter(const ScriptableObjects::IConfi
 // initialize with random numbers
 // if 'initOnCPUOnly' then always init on CPU, making initialization consistent across both (for testing)
 template <class ElemType>
-/*virtual*/ void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
+void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
                                                const unsigned long randomSeed,
                                                const ElemType initValueScale,
                                                bool initOnCPUOnly)
@ -162,6 +162,7 @@ void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& ar
    VerifyDataSize(Value());      // sanity check
 }

+// TODO: Move this error check there, since this is called only from one place.
 template <class ElemType>
 void LearnableParameter<ElemType>::ReviseFromFile(const std::wstring& reviseFromFilePath)
 {
@ -305,4 +306,7 @@ template <class ElemType>
    SetLearningRateMultiplier(0);
 }

+template class LearnableParameter<float>;
+template class LearnableParameter<double>;
+
 }}}
--- a/Source/ComputationNetworkLib/InputAndParamNodes.h
+++ b/Source/ComputationNetworkLib/InputAndParamNodes.h
@ -5,7 +5,6 @@
 #pragma once

 #include "Basics.h"
-#include "Quantizers.h"
 #include "ComputationNode.h"
 #include "ScriptableObjects.h"
 #include "TensorShape.h"
@ -28,6 +27,7 @@ class LearnableParameter : public ComputationNode<ElemType>, public NumInputs<0>
    static const std::wstring TypeName() { return L"LearnableParameter"; }

    void InitShape(const TensorShape& shape);
+
    // helper to initialize from a matrix read from a text file or a string literal
    void InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols);

@ -56,11 +56,10 @@ public:
    void InitRandom(const bool uniformInit, const unsigned long randomSeed, const ElemType initValueScale, bool initOnCPUOnly);

    // initialize by reading a matrix from a text file
-    virtual void InitFromFile(const std::wstring& initFromFilePath);
+    void InitFromFile(const std::wstring& initFromFilePath);

    // reload parameters from file
    // This is called from MEL.
-    // TODO: Move this error check there, since this is called only from one place.
    void ReviseFromFile(const std::wstring& reviseFromFilePath);

    virtual void Save(File& fstream) const override;
@ -85,37 +84,6 @@ public:
    virtual void FreezeParameters() override; // from IFreezable
 };

-// -----------------------------------------------------------------------
-// LearnableParameterQuantized (/*no input*/)
-// Represents quantized weight matrices and biases
-// This node is for inference only and should not be used during training
-// Expected workflow: 
-//    (1) Train a model with LearnableParameter
-//    (2) To prepare the model for runtime, use BS to convert desired LearnableParameter nodes to LearnableParameterQuantized
-// TODO: add -Node to the class name
-// -----------------------------------------------------------------------
-template <class ElemType, class QuantizedType>
-class LearnableParameterQuantized : public LearnableParameter<ElemType>
-{
-public:
-    LearnableParameterQuantized(const Matrix<ElemType>& learnableParameterValue, DEVICEID_TYPE deviceId, std::wstring nodeName, std::shared_ptr<IQuantizerBase<ElemType, QuantizedType>> quantizer) :
-        LearnableParameter<ElemType>(deviceId, nodeName)
-    {
-        // create a temp array for demonstration before we figure out how to store quantized matrix
-        QuantizedType* quantizedData = new QuantizedType[learnableParameterValue.GetNumElements()];
-        quantizer->Quantize(learnableParameterValue.Data(), quantizedData, learnableParameterValue.GetNumElements());
-
-        delete[] quantizedData;
-    }
-
-    virtual void InitFromFile(const std::wstring& initFromFilePath) override;
-
-    virtual void Save(File& fstream) const override;
-    virtual void Load(File& fstream, size_t modelVersion) override;
-
-    virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
-};
-
 // -----------------------------------------------------------------------
 // DynamicAxisNode (/*no input*/)
 // This is a holder for MBLayout objects shared across inputs.
--- a/Source/Math/GPUSparseMatrix.cu
+++ b/Source/Math/GPUSparseMatrix.cu
@ -2689,7 +2689,6 @@ template void GPUSparseMatrix<char>::CopyToCPUSparseMatrix(CPUSparseMatrix<char>
 template void GPUSparseMatrix<char>::ChangeDeviceTo(int);
 template void GPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
 template void GPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
-template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
 template void GPUSparseMatrix<char>::Reset();
 template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<char>::SecondaryIndexValueAt(size_t) const;
 template GPUSparseMatrix<char>::~GPUSparseMatrix();
@ -2713,7 +2712,6 @@ template void GPUSparseMatrix<short>::CopyToCPUSparseMatrix(CPUSparseMatrix<shor
 template void GPUSparseMatrix<short>::ChangeDeviceTo(int);
 template void GPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
 template void GPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
-template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);
 template void GPUSparseMatrix<short>::Reset();
 template GPUSPARSE_INDEX_TYPE GPUSparseMatrix<short>::SecondaryIndexValueAt(size_t) const;
 template GPUSparseMatrix<short>::~GPUSparseMatrix();
@ -2725,6 +2723,7 @@ template void GPUSparseMatrix<short>::ScaleAndAdd(short, GPUSparseMatrix<short>

 template GPUSparseMatrix<int>::GPUSparseMatrix(DEVICEID_TYPE, const MatrixFormat);
 template GPUSparseMatrix<int>::~GPUSparseMatrix();
+template void GPUSparseMatrix<int>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, const bool);

 template <class ElemType>
 MATH_API File& operator>>(File& stream, GPUSparseMatrix<ElemType>& us)
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@ -1144,7 +1144,7 @@ template <>
 /*static*/ short Matrix<short>::MakeNan(size_t)
 {
    return 0;
-}
+} // (needed for completeness)

 template <class ElemType>
 void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemType val)
--- a/Source/Math/Quantizers.h
+++ b/Source/Math/Quantizers.h
@ -4,25 +4,28 @@
 //
 #pragma once

-#pragma warning(disable : 4127) // conditional expression is constant
-
 namespace Microsoft { namespace MSR { namespace CNTK {

-// RawType - input type to the quantizer
+// RawType - input type to the quantizer. Currently CNTK supports float or double as RawType.
 // QuantizedType - output type of the quantizer
 template <class RawType, class QuantizedType>
-class IQuantizerBase 
+class QuantizerBase 
 {
 public:
    virtual void Quantize(const RawType* input, QuantizedType* output, size_t arraySize) = 0;
    virtual void Dequantize(const QuantizedType* input, RawType* output, size_t arraySize) = 0;

 protected:
-    static int rangeMax;
+    static const int QuantizerBase<RawType, short>::rangeMax = std::numeric_limits<short>::max();
 };

+// Symmetric quantizer. 
+// Quantization is achieved by 
+//    1. Finding the absolute max of values to be quantized.
+//    2. Adjusting the absolute max with extraBits parameters.
+//    3. Scaling all values in the collection to be within the symmetric range of the QuantizedType
 template <class RawType, class QuantizedType>
-class SymmetricQuantizer : public IQuantizerBase<RawType, QuantizedType>
+class SymmetricQuantizer : public QuantizerBase<RawType, QuantizedType>
 {
    RawType m_quantizer;
    RawType m_inverseQuantizer;
@ -32,12 +35,10 @@ public:
    // extraBits decreases the quantization normalizer to prevent integer overflow during BLAS routines.
    //     Higher extraBits will decrease precision of quantization, but will make BLAS routines less prone to overflow.
    //     For quantization with shorts, recommended value of extraBits is 1-3.
-    SymmetricQuantizer(RawType* elements, size_t elementsSize, size_t extraBits)
+    // This constructor accepts the collection of RawType to initialize internal quantizer
+    // and then apply this quantizer to collections with similar range as the one it was initialized with.
+    SymmetricQuantizer(const RawType* elements, size_t elementsSize, size_t extraBits)
    {
-        if (elementsSize == 0)
-        {
-            LogicError("The sequence to be quantized is empty.");
-        }
        m_absMax = FindAbsMax(elements, elementsSize);
        SymmetricQuantizer(m_absMax, extraBits);
    }
@ -55,6 +56,7 @@ public:
        m_inverseQuantizer = 1 / m_quantizer;
    }

+    // Perform quantization of the input collection, put result into pre-allocated output collection
    virtual void Quantize(const RawType* input, QuantizedType* output, size_t inputSize)
    {
        for (size_t i = 0; i < inputSize; i++)
@ -66,14 +68,12 @@ public:
        }
    }

+    // Accept quantized collection as input, put de-quantization result into pre-allocated output collection.
    virtual void Dequantize(const QuantizedType* input, RawType* output, size_t inputSize)
    {
        for (size_t i = 0; i < inputSize; i++)
        {
            output[i] = (RawType)(input[i] * m_inverseQuantizer);
-#ifdef _DEBUG
-            assert(abs(output[i]) <= m_absMax);
-#endif
        }
    }

@ -81,8 +81,8 @@ private:
    // Find absolute maximum value
    RawType FindAbsMax(RawType* elements, size_t elementsSize)
    {
-        // in constructor we asserted that arraySize > 0
-        RawType maxElem, minElem = elements[0];
+        RawType maxElem = std::numeric_limits<float>::min();
+        RawType minElem = std::numeric_limits<float>::max();
        for (size_t i = 0; i < elementsSize; i++)
        {
            maxElem = std::max(maxElem, elements[i]);
@ -93,7 +93,4 @@ private:
    }
 };

-int IQuantizerBase<float, short>::rangeMax = SHRT_MAX;
-int IQuantizerBase<double, short>::rangeMax = SHRT_MAX;
-
 }}}