From 279a466b5f58ea0f036b60f0c166d8c11429a3fe Mon Sep 17 00:00:00 2001
From: Bowen Bao <bowbao@microsoft.com>
Date: Thu, 26 Jul 2018 14:46:06 -0700
Subject: [PATCH] Squash of the following 1. fix gather and prelu arguments
 order issue. 2. add typenameToTypeProto map initialization. 3. add CNTK
 native default lotus logger. 4. add CNTK unsqueeze op.

---
 .../API/CNTKLibraryInternals.h                |  1 +
 .../CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj |  1 +
 .../CNTKv2LibraryDll.vcxproj.filters          |  3 +
 Source/CNTKv2LibraryDll/Function.cpp          | 97 ++++++++++++++++++-
 Source/CNTKv2LibraryDll/Logger.h              | 39 ++++++++
 .../proto/onnx/CNTKToONNX.cpp                 |  7 ++
 Source/CNTKv2LibraryDll/proto/onnx/ONNX.cpp   | 51 ++++++++++
 Source/CNTKv2LibraryDll/proto/onnx/ONNX.h     |  3 +
 .../proto/onnx/ONNXToCNTK.cpp                 |  6 ++
 .../CNTKv2LibraryDll/proto/onnx/Operators.cpp |  6 +-
 bindings/python/cntk/tests/onnx_op_test.py    | 29 ++++--
 bindings/python/onnx_cntk/backend.py          |  2 +-
 12 files changed, 232 insertions(+), 13 deletions(-)
 create mode 100644 Source/CNTKv2LibraryDll/Logger.h
diff --git a/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h b/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h
index b08b7c400..e0001c4e7 100644
--- a/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h
+++ b/Source/CNTKv2LibraryDll/API/CNTKLibraryInternals.h
@@ -262,6 +262,7 @@ namespace CNTK
                                                              const std::vector<bool>& autoPadding,const NDShape& dilation, size_t maxTempMemSizeInSamples, const std::wstring& name = L"");
         CNTK_API FunctionPtr MatMul(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name = L"");
         CNTK_API FunctionPtr Gemm(const Variable& operandA, const Variable& operandB, const Variable& operandC, float alpha = 1.0, float beta = 1.0, bool transA = false, bool transB = false, const std::wstring& name = L"");
+        CNTK_API FunctionPtr Unsqueeze(const Variable& operand, const std::vector<Axis>& axes, const std::wstring& name = L"");
 
         // This is meant for debugging purposes only and is very likely to be deprecated in the future.
         CNTK_API void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile);
diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
index 43b5a8861..40433e37e 100644
--- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
+++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
@@ -171,6 +171,7 @@
     <ClInclude Include="DistributedLearnerBase.h" />
     <ClInclude Include="EvaluatorWrapper.h" />
     <ClInclude Include="Learner.h" />
+    <ClInclude Include="Logger.h" />
     <ClInclude Include="MinibatchSource.h" />
     <ClInclude Include="PrimitiveFunctionAttributes.h" />
     <ClInclude Include="PrimitiveFunction.h" />
diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
index 39e9107fa..2960c42b5 100644
--- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
+++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
@@ -297,6 +297,9 @@
     <ClInclude Include="proto\onnx\onnx_repo\onnx\common\assertions.h">
       <Filter>proto\onnx\onnx_repo\onnx\common</Filter>
     </ClInclude>
+    <ClInclude Include="Logger.h">
+      <Filter>proto\onnx</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Filter Include="API">
diff --git a/Source/CNTKv2LibraryDll/Function.cpp b/Source/CNTKv2LibraryDll/Function.cpp
index 2038d091a..d020af0c5 100644
--- a/Source/CNTKv2LibraryDll/Function.cpp
+++ b/Source/CNTKv2LibraryDll/Function.cpp
@@ -116,6 +116,12 @@ namespace CNTK
                 // The first two inputs are Constant for alpha and beta, followed with three Variable A, B and C.
                 inputs = { m_inputs[0], m_inputs[1], m_inputs[3], m_inputs[2], m_inputs[4] };
             }
+            else if (pythonOperandOrder && primitiveFunction && (primitiveFunction->OpName() == L"GatherOp" || primitiveFunction->OpType() == PrimitiveOpType::Gather))
+            {
+                assert(m_inputs.size() == 2);
+                // For GatherOp, python operand order is reversed. 
+                inputs = { m_inputs[1], m_inputs[0] };
+            }
             else
                 inputs = m_inputs;
         }
@@ -2258,7 +2264,7 @@ namespace CNTK
             auto swapped = TransposeAxes(refPlaceholder, lastAxis, axis);
             auto gatherSwapped = GatherOp(indPlaceholder, swapped);
             auto result = TransposeAxes(gatherSwapped, lastAxis, axis);
-            return AsBlock(std::move(result), { { refPlaceholder, reference },{ indPlaceholder, indices } }, std::move(additionalProperties), L"GatherOp", name);
+            return AsBlock(std::move(result), { { indPlaceholder, indices }, { refPlaceholder, reference } }, std::move(additionalProperties), L"GatherOp", name);
         }
     }
 
@@ -2917,12 +2923,13 @@ namespace CNTK
     FunctionPtr PReLU(const Variable& alpha, const Variable& operand, const std::wstring& name)
     {
         auto operandPlaceholder = PlaceholderVariable();
+        auto alphaPlaceholder = PlaceholderVariable();
         auto lessThanZero = Less(operandPlaceholder, Constant::Scalar(operand.GetDataType(), 0.0));
         auto result = ElementSelect(lessThanZero,
-            ElementTimes(alpha, operandPlaceholder),
+            ElementTimes(alphaPlaceholder, operandPlaceholder),
             operandPlaceholder);
 
-        return AsBlock(std::move(result), { { operandPlaceholder, operand } }, L"PReLU", name);
+        return AsBlock(std::move(result), { { operandPlaceholder, operand },{ alphaPlaceholder, alpha } }, L"PReLU", name);
     }
 
     FunctionPtr Softplus(const Variable& operand, const std::wstring& name)
@@ -3749,5 +3756,89 @@ namespace CNTK
                 std::move(attributes),
                 L"Gemm", name);
         }
+
+        FunctionPtr Unsqueeze(const Variable& operand, const std::vector<Axis>& axes, const std::wstring& name)
+        {
+            int cntk_index;
+            int onnx_axis;
+
+            std::vector<size_t> axesIndices;
+            for (auto axis : axes)
+            {
+                // We need to express in onnx axis system to help ONNX conversion.
+                if (axis.IsStaticAxis())
+                {
+                    if (axis.StaticAxisIndex() < 0)
+                    {
+                        // python shape [2,3,4,5], cntk_py_index = 1 (point at 3). 
+                        // in python, sanitize_axis applies Axis(-cntk_py_index - 1) so axis = -2
+                        // in cpp shape becomes [5,4,3,2], axis(-2) is still pointing to 3 (from the last)
+                        // With ONNX Unsqueeze op, result shall be: [2,3,4,5]. thus onnx_axis = cntk_py_index = 1 (point to 3)
+                        // for CNTK reshape, cntk_index shall point to the one after 3 (2): cntk_index = axis + 1
+                        // cntk_index (-1) needs to be converted to positive by rank + cntk_index = 3
+                        int cntk_py_index = -axis.StaticAxisIndex() - 1;
+                        onnx_axis = cntk_py_index;
+                        cntk_index = axis.StaticAxisIndex() + operand.Shape().Rank() + axes.size();
+                    }
+                    else
+                    {
+                        // in this case shape is the same as in python: [2,3,4,5]
+                        // that is: cntk_py_index = 1, points to 3
+                        // onnx_axis = 1, points to 3 in [2,3,4,5]
+                        // cntk_index = 1, points to 3 in [2,3,4,5]
+                        int cntk_py_index = axis.StaticAxisIndex();
+                        onnx_axis = cntk_py_index;
+                        cntk_index = cntk_py_index;
+                    }
+                }
+                else if (axis.IsBatchAxis())
+                {
+                    // expected result: [[batch],[flatten sample]]([[#][2,3,4,5]])
+                    // current onnx Unsqueeze op should not have batch axis in attribute. 
+                    cntk_index = 0;
+                }
+                else
+                {
+                    LogicError("Unsqueeze: accept only static and batch axes.");
+                }
+
+                if (cntk_index < 0 || cntk_index > operand.Shape().Rank() + axes.size())
+                {
+                    LogicError("Unsqueeze: unsupported axis (operand.Shape().Rank() = %zu, outShape.Rank() = %zu, axis = %s).",
+                        operand.Shape().Rank(), operand.Shape().Rank() + axes.size(), ToLegacyString(ToUTF8(axis.AsString())).c_str());
+                }
+
+                axesIndices.push_back(static_cast<size_t>(cntk_index));
+            }
+
+            std::vector<size_t> outShape(axesIndices.size() + operand.Shape().Rank(), 0);
+            for (int axis : axesIndices)
+            {
+                if (axis >= outShape.size())
+                    LogicError("Unsqueeze: 'axes' has an out of range axis(%d >= %zu).", axis, outShape.size());
+                if (outShape[axis] != 0)
+                    LogicError("Unsqueeze: 'axes' has a duplicate axis(%d).", axis);
+                outShape[axis] = 1;
+            }
+
+            auto begin = operand.Shape().Dimensions().cbegin();
+            for (auto &axisSize : outShape)
+            {
+                if (axisSize == 0)
+                {
+                    axisSize = *begin++;
+                }
+            }
+            assert(begin == operand.Shape().Dimensions().cend());
+
+            Dictionary attributes = Dictionary();
+            attributes[PrimitiveFunction::AttributeNameAxisVec] = AsDictionaryValueVector(axes);
+
+            Variable operandPlaceholder = PlaceholderVariable(operand.Shape(), L"operandPlaceholder", {});
+
+            FunctionPtr result = Reshape(operandPlaceholder, outShape);
+
+            return AsBlock(std::move(result), {{operandPlaceholder, operand}}, std::move(attributes), L"Unsqueeze", name);
+        }
     }
 }
diff --git a/Source/CNTKv2LibraryDll/Logger.h b/Source/CNTKv2LibraryDll/Logger.h
new file mode 100644
index 000000000..e1aa43157
--- /dev/null
+++ b/Source/CNTKv2LibraryDll/Logger.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <ostream>
+#include <sstream>
+#include <string>
+#include <iostream>
+
+#include "core/common/logging/capture.h"
+#include "core/common/logging/isink.h"
+
+namespace CNTK {
+class CNTKClogSink : public Lotus::Logging::ISink {
+public:
+    CNTKClogSink()
+        : stream_{&(std::clog)}, flush_{true}
+    {}
+
+    void SendImpl(const Lotus::Logging::Timestamp &timestamp, 
+        const std::string &logger_id, const Lotus::Logging::Capture &message) override
+    {
+        UNUSED_PARAMETER(timestamp);
+
+        std::ostringstream msg;
+
+        msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", "
+            << message.Location().ToString() << "] " << message.Message();
+
+        (*stream_) << msg.str() << "\n";
+
+        if (flush_) {
+            stream_->flush();
+        }
+    }
+
+private:
+    std::ostream *stream_;
+    const bool flush_;
+};
+} // namespace CNTK
\ No newline at end of file
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp b/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp
index 0ba2f0f87..b2ed68e21 100644
--- a/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp
+++ b/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp
@@ -3089,6 +3089,13 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, LotusIR::Node* nod
             node->AddAttribute("transA", transA);
             node->AddAttribute("transB", transB);
         }
+        else if (src->OpName() == L"Unsqueeze")
+        {
+            std::vector<Axis> axes = AsVector<Axis>(src->Attributes()[L"axisVec"].Value<std::vector<DictionaryValue>>());
+            std::vector<int64_t> ax = ConvertAxesToOnnx(axes, src->Inputs()[0]);
+
+            node->AddAttribute("axes", ax);
+        }
     }
     else
     {
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/ONNX.cpp b/Source/CNTKv2LibraryDll/proto/onnx/ONNX.cpp
index 71f31c581..ff5484588 100644
--- a/Source/CNTKv2LibraryDll/proto/onnx/ONNX.cpp
+++ b/Source/CNTKv2LibraryDll/proto/onnx/ONNX.cpp
@@ -5,7 +5,9 @@
 
 #include "proto/onnx/core/graph/model.h"
 #include "proto/onnx/core/graph/graph.h"
+#include "proto/onnx/core/common/logging/logging.h"
 
+#include "Logger.h"
 #include "ONNX.h"
 #include "CNTKToONNX.h"
 #include "ONNXToCNTK.h"
@@ -19,6 +21,32 @@ using namespace Microsoft::MSR::CNTK;
 
 namespace CNTK
 {
+    std::once_flag ONNXFormat::op_schema_initializer_flag_;
+    static std::string defaultLoggerId{"Default"};
+    static Lotus::Logging::LoggingManager default_logging_manager_{ 
+        std::unique_ptr<Lotus::Logging::ISink>{new CNTKClogSink{}},
+        [](){
+            Lotus::Logging::Severity severity;
+            switch (GetTraceLevel())
+            {
+            case TraceLevel::Error:
+                severity = Lotus::Logging::Severity::kERROR;
+                break;
+            case TraceLevel::Warning:
+                severity = Lotus::Logging::Severity::kWARNING;
+                break;
+            case TraceLevel::Info:
+                severity = Lotus::Logging::Severity::kINFO;
+                break;
+            default:
+                severity = Lotus::Logging::Severity::kFATAL;
+            }
+            return severity;
+        }(),
+        false,
+        Lotus::Logging::LoggingManager::InstanceType::Default,
+        &defaultLoggerId };
+
     // MaxVersion number in ONNX 1.2 is 7. Change this number (e.g. to 1 or 5) 
     // to experiment with earlier version ONNX. This is to help debugging with reshape op 
     // (and some convolution ops which only passed with newer version)
@@ -60,8 +88,29 @@ namespace CNTK
     }
 }
 
+void ONNXFormat::InitializeLotusIR()
+{
+    //
+    // Initializing ONNX_NAMESPACE::Utils::DataTypeUtils::GetTypeStrToProtoMap()
+    // 
+    // This is a static unordered_map<string, TypeProto> variable that stores the mapping from type name(string) to TypeProto.
+    // If used without proper initialization, we risk poluting this static map: 
+    // Whenever it sees a TypeProto with an unseen type name, it tries to store that TypeProto into the map. 
+    // That TypeProto object might very likely contain TensorShapeProto, which describes the shape for that particular tensor. 
+    // This shape will become the default for every TypeProto object created from that type name later on. 
+    // And this leads to lots of unexpected errors such as shape inference failure. 
+    //
+    // The solution is to initialize the map at the first run. 
+    std::call_once(op_schema_initializer_flag_, [&]() {
+        ONNX_NAMESPACE::OpSchema tmpSchemaForInitializingAllTensorTypes;
+        tmpSchemaForInitializingAllTensorTypes.TypeConstraint("T", ONNX_NAMESPACE::OpSchema::all_tensor_types(), "");
+    });
+}
+
 void ONNXFormat::Save(const FunctionPtr& src, const std::wstring& filepath)
 {
+    InitializeLotusIR();
+
     auto model = CNTKToONNX::CreateModel(src);
 #ifdef _WIN32
     LotusIR::Model::Save(*model, filepath);
@@ -72,6 +121,8 @@ void ONNXFormat::Save(const FunctionPtr& src, const std::wstring& filepath)
 
 FunctionPtr ONNXFormat::Load(const std::wstring& filepath, const DeviceDescriptor& computeDevice)
 {
+    InitializeLotusIR();
+
     std::shared_ptr<LotusIR::Model> model;
 
 #ifdef _WIN32
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/ONNX.h b/Source/CNTKv2LibraryDll/proto/onnx/ONNX.h
index 50b7ce03d..76741ab84 100644
--- a/Source/CNTKv2LibraryDll/proto/onnx/ONNX.h
+++ b/Source/CNTKv2LibraryDll/proto/onnx/ONNX.h
@@ -16,5 +16,8 @@ namespace CNTK
     public:
         static void Save(const FunctionPtr& src, const std::wstring& filepath);
         static FunctionPtr Load(const std::wstring& filepath, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
+    private:
+        static void InitializeLotusIR();
+        static std::once_flag op_schema_initializer_flag_;
     };
 }
\ No newline at end of file
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/ONNXToCNTK.cpp b/Source/CNTKv2LibraryDll/proto/onnx/ONNXToCNTK.cpp
index 8739a1a56..3d03e15d3 100644
--- a/Source/CNTKv2LibraryDll/proto/onnx/ONNXToCNTK.cpp
+++ b/Source/CNTKv2LibraryDll/proto/onnx/ONNXToCNTK.cpp
@@ -2634,6 +2634,12 @@ FunctionPtr ONNXToCNTKHelper::CreateFunction(const Node *node, const std::vector
         FunctionPtr cntkFunction = Reshape(inputs[0], newShape, ToFixedWStringFromMultiByte(node->Name()));
         return cntkFunction;
     }
+    else if (onnxOpName == "Unsqueeze")
+    {
+        std::vector<Axis> axes = ConvertONNXAxesToCNTKCppApi(GetNamedAttributeAsInt64Vec(node, "axes"), inputs[0]);
+        FunctionPtr cntkFunction = ::CNTK::Internal::Unsqueeze(inputs[0], axes, ToFixedWStringFromMultiByte(node->Name()));
+        return cntkFunction;
+    }
     else if (onnxOpName == "Concat")
     {
         // We allow the 'axis' attribute to be optional, and not required (as
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/Operators.cpp b/Source/CNTKv2LibraryDll/proto/onnx/Operators.cpp
index c6afb3dba..9f8a08e12 100644
--- a/Source/CNTKv2LibraryDll/proto/onnx/Operators.cpp
+++ b/Source/CNTKv2LibraryDll/proto/onnx/Operators.cpp
@@ -417,6 +417,9 @@ namespace ONNX
         { L"MatMul",{ {
             { L"MatMul", "MatMul" },
         } } },
+        { L"Unsqueeze",{ {
+            { L"Unsqueeze", "Unsqueeze" },
+        } } },
     };
 
     // given a cntkOpName and cntk attribute OpName which is saved in CNTK::Function's attribute,
@@ -486,7 +489,6 @@ namespace ONNX
             { L"ELU",{ 0, 1 } },
             { L"LeakyReLU",{ 0, 1 } },
             { L"SELU",{ 0, 1, 2 } },
-            { L"PReLU",{ 0 } },
             { L"ElementMax",{} },
             { L"ElementMin",{} },
             { L"HardSigmoid",{ 0, 1, 2, 3 } },
@@ -509,7 +511,7 @@ namespace ONNX
             { L"BatchNormalization",{ 0, 1, 2, 3, 4, -1 } },
             { L"Times",{ 1, 0 } },
             { L"Gather",{ 1, 0 } },
-            { L"PReLU",{ 1, 0 } },
+            { L"PReLU",{ -1, 0, 1 } },
             { L"Gemm", { -1, -1, 1, 0, 2} },
         };
 
diff --git a/bindings/python/cntk/tests/onnx_op_test.py b/bindings/python/cntk/tests/onnx_op_test.py
index 613d9b53c..bfe461612 100644
--- a/bindings/python/cntk/tests/onnx_op_test.py
+++ b/bindings/python/cntk/tests/onnx_op_test.py
@@ -524,11 +524,11 @@ def test_Floor(tmpdir, dtype):
 #Gather
 @pytest.mark.parametrize("dtype", DType_Config)
 def test_Gather(tmpdir, dtype):
-    pytest.skip('Needs to be fixed after removal of batch axis change.')
     if (dtype == np.float16):
         pytest.skip("TO BE FIXED")
     with C.default_options(dtype = dtype):
-        c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype)
+        c = np.asarray([[[0],[1]]]).astype(dtype) 
+        #c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype) # batch size = 2 not supported yet. 
         x = C.input_variable((2,1))
         d = np.arange(12).reshape(6,2).astype(dtype)
         y = C.constant(d)
@@ -1052,11 +1052,26 @@ def test_Pad(tmpdir, dtype):
         verify_one_input(model, data, tmpdir, 'Pad_1')
 
 #PRelu
-#def test_PRelu(tmpdir):
-#    data = np.asarray([[-1, -0.5, 0, 1, 2]])
-#    alpha = C.constant(value=[[0.5, 0.5, 0.5, 0.5, 0.5]])
-#    model = C.param_relu(alpha, data)
-#    verify_no_input(model, tmpdir, 'PRelu_0')
+@pytest.mark.parametrize("dtype", DType_Config)
+def test_PRelu(tmpdir, dtype):
+    # no input
+    x_data = np.asarray([[-1, -0.5, 0, 1, 2]], dtype=dtype)
+    x = C.constant(value=x_data, dtype=dtype)
+    alpha_data = np.asarray([[0.5, 0.5, 0.5, 0.5, 0.5]], dtype=dtype)
+    alpha = C.constant(value=alpha_data, dtype=dtype)
+    model = C.param_relu(alpha, x)
+    verify_no_input(model, tmpdir, 'PRelu_0')
+
+    # one input
+    x = C.input_variable(x_data.shape, dtype=dtype)
+    model = C.param_relu(alpha, x)
+    verify_one_input(model, x_data, tmpdir, 'PRelu_1')
+
+    # two input
+    x = C.input_variable(x_data.shape, dtype=dtype)
+    alpha = C.input_variable(alpha_data.shape, dtype=dtype)
+    model = C.param_relu(alpha, x)
+    verify_two_input(model, alpha_data, x_data, tmpdir, 'PRelu_2')
 
 #Pow
 @pytest.mark.parametrize("dtype", DType_Config)
diff --git a/bindings/python/onnx_cntk/backend.py b/bindings/python/onnx_cntk/backend.py
index 00bd0e796..181713e78 100644
--- a/bindings/python/onnx_cntk/backend.py
+++ b/bindings/python/onnx_cntk/backend.py
@@ -61,7 +61,7 @@ class CNTKBackendRep(BackendRep):
         self.expected_out_types = expected_out_types
 
     def run(self, inputs, **kwargs):
-        input = {self.model.arguments[i]:inputs[i] for i in range(len(inputs))} 
+        input = {self.model.arguments[i]:inputs[i] for i in range(len(inputs))}
         res = self.model.eval(input)
         # TODO: make this work for multiple output case.
         # TODO: support more types.