Printing top K values instead of just single max value (#189)

Currently WinMLRunner prints top value in the result float tensor. This change is to output top K values instead: Here is the sample output: Creating Session with GPU: AMD FirePro W4100 Graphics Adapter Binding (device = GPU, iteration = 1, inputBinding = CPU, inputDataType = RGB_Image, deviceCreationLocation = WinML)...[SUCCESS] Outputting top 5 values Feature Name: resnetv23_dense0_fwd index: 409, value: 6.74914 index: 769, value: 5.66233 index: 664, value: 5.62002 index: 585, value: 5.48044 index: 872, value: 5.29839 Evaluating (device = GPU, iteration = 1, inputBinding = CPU, inputDataType = RGB_Image, deviceCreationLocation = WinML)...[SUCCESS]
2019-03-06 16:06:56 -08:00 · 2019-03-06 16:06:56 -08:00 · 430847ebff
--- a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
+++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
@ -749,6 +749,12 @@ namespace WinMLRunnerTest
            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
        }
        
+        TEST_METHOD(TestTopK)
+        {
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"SqueezeNet.onnx", L"-TopK", L"5" });
+            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+        }
+
        /* Commenting out test until WinMLRunnerDLL.dll is properly written and ABI friendly
        TEST_METHOD(TestWinMLRunnerDllLinking)
        {
--- a/Tools/WinMLRunner/README.md
+++ b/Tools/WinMLRunner/README.md
@ -45,6 +45,7 @@ Required command-Line arguments:
 -Perf [all]: capture performance measurements such as timing and memory usage. Specifying "all" will output all measurements
 -Iterations : # times perf measurements will be run/averaged
 -Input <fully qualified path>: binds image or CSV to model
+-TopK <number>: print top <number> values in the result. Default to 1
 -PerfOutput [<fully qualified path>]: csv file to write the perf results to
 -SavePerIterationPerf : save per iteration performance results to csv file
 -SaveTensorData <saveMode folderPath>: saveMode: save first iteration or all iteration output tensor results to csv file [First, All]
--- a/Tools/WinMLRunner/src/BindingUtilities.h
+++ b/Tools/WinMLRunner/src/BindingUtilities.h
@ -13,35 +13,119 @@ using namespace winrt::Windows::Graphics::DirectX;
 using namespace winrt::Windows::Graphics::Imaging;
 using namespace winrt::Windows::Graphics::DirectX::Direct3D11;

-template <TensorKind T> struct TensorKindToType { static_assert(true, "No TensorKind mapped for given type!"); };
-template <> struct TensorKindToType<TensorKind::UInt8> { typedef uint8_t Type; };
-template <> struct TensorKindToType<TensorKind::Int8> { typedef uint8_t Type; };
-template <> struct TensorKindToType<TensorKind::UInt16> { typedef uint16_t Type; };
-template <> struct TensorKindToType<TensorKind::Int16> { typedef int16_t Type; };
-template <> struct TensorKindToType<TensorKind::UInt32> { typedef uint32_t Type; };
-template <> struct TensorKindToType<TensorKind::Int32> { typedef int32_t Type; };
-template <> struct TensorKindToType<TensorKind::UInt64> { typedef uint64_t Type; };
-template <> struct TensorKindToType<TensorKind::Int64> { typedef int64_t Type; };
-template <> struct TensorKindToType<TensorKind::Boolean> { typedef boolean Type; };
-template <> struct TensorKindToType<TensorKind::Double> { typedef double Type; };
-template <> struct TensorKindToType<TensorKind::Float> { typedef float Type; };
-template <> struct TensorKindToType<TensorKind::Float16> { typedef float Type; };
-template <> struct TensorKindToType<TensorKind::String> { typedef winrt::hstring Type; };
+template <TensorKind T> struct TensorKindToType
+{
+    static_assert(true, "No TensorKind mapped for given type!");
+};
+template <> struct TensorKindToType<TensorKind::UInt8>
+{
+    typedef uint8_t Type;
+};
+template <> struct TensorKindToType<TensorKind::Int8>
+{
+    typedef uint8_t Type;
+};
+template <> struct TensorKindToType<TensorKind::UInt16>
+{
+    typedef uint16_t Type;
+};
+template <> struct TensorKindToType<TensorKind::Int16>
+{
+    typedef int16_t Type;
+};
+template <> struct TensorKindToType<TensorKind::UInt32>
+{
+    typedef uint32_t Type;
+};
+template <> struct TensorKindToType<TensorKind::Int32>
+{
+    typedef int32_t Type;
+};
+template <> struct TensorKindToType<TensorKind::UInt64>
+{
+    typedef uint64_t Type;
+};
+template <> struct TensorKindToType<TensorKind::Int64>
+{
+    typedef int64_t Type;
+};
+template <> struct TensorKindToType<TensorKind::Boolean>
+{
+    typedef boolean Type;
+};
+template <> struct TensorKindToType<TensorKind::Double>
+{
+    typedef double Type;
+};
+template <> struct TensorKindToType<TensorKind::Float>
+{
+    typedef float Type;
+};
+template <> struct TensorKindToType<TensorKind::Float16>
+{
+    typedef float Type;
+};
+template <> struct TensorKindToType<TensorKind::String>
+{
+    typedef winrt::hstring Type;
+};

-template <TensorKind T> struct TensorKindToValue { static_assert(true, "No TensorKind mapped for given type!"); };
-template <> struct TensorKindToValue<TensorKind::UInt8> { typedef TensorUInt8Bit Type; };
-template <> struct TensorKindToValue<TensorKind::Int8> { typedef TensorInt8Bit Type; };
-template <> struct TensorKindToValue<TensorKind::UInt16> { typedef TensorUInt16Bit Type; };
-template <> struct TensorKindToValue<TensorKind::Int16> { typedef TensorInt16Bit Type; };
-template <> struct TensorKindToValue<TensorKind::UInt32> { typedef TensorUInt32Bit Type; };
-template <> struct TensorKindToValue<TensorKind::Int32> { typedef TensorInt32Bit Type; };
-template <> struct TensorKindToValue<TensorKind::UInt64> { typedef TensorUInt64Bit Type; };
-template <> struct TensorKindToValue<TensorKind::Int64> { typedef TensorInt64Bit Type; };
-template <> struct TensorKindToValue<TensorKind::Boolean> { typedef TensorBoolean Type; };
-template <> struct TensorKindToValue<TensorKind::Double> { typedef TensorDouble Type; };
-template <> struct TensorKindToValue<TensorKind::Float> { typedef TensorFloat Type; };
-template <> struct TensorKindToValue<TensorKind::Float16> { typedef TensorFloat16Bit Type; };
-template <> struct TensorKindToValue<TensorKind::String> { typedef TensorString Type; };
+template <TensorKind T> struct TensorKindToValue
+{
+    static_assert(true, "No TensorKind mapped for given type!");
+};
+template <> struct TensorKindToValue<TensorKind::UInt8>
+{
+    typedef TensorUInt8Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int8>
+{
+    typedef TensorInt8Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt16>
+{
+    typedef TensorUInt16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int16>
+{
+    typedef TensorInt16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt32>
+{
+    typedef TensorUInt32Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int32>
+{
+    typedef TensorInt32Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt64>
+{
+    typedef TensorUInt64Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int64>
+{
+    typedef TensorInt64Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Boolean>
+{
+    typedef TensorBoolean Type;
+};
+template <> struct TensorKindToValue<TensorKind::Double>
+{
+    typedef TensorDouble Type;
+};
+template <> struct TensorKindToValue<TensorKind::Float>
+{
+    typedef TensorFloat Type;
+};
+template <> struct TensorKindToValue<TensorKind::Float16>
+{
+    typedef TensorFloat16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::String>
+{
+    typedef TensorString Type;
+};

 namespace BindingUtilities
 {
@ -235,11 +319,12 @@ namespace BindingUtilities
            for (UINT dim = 0; dim < tensorDescriptorShape.Size(); dim++)
            {
                INT64 dimSize = tensorDescriptorShape.GetAt(dim);
-                if (dimSize > 0) //If the dimension is greater than 0, then it is known.
+                if (dimSize > 0) // If the dimension is greater than 0, then it is known.
                {
                    vecShape.push_back(dimSize);
                }
-                else //otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an invalid model.
+                else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an
+                     // invalid model.
                {
                    if (dimSize == -1)
                    {
@ -247,7 +332,8 @@ namespace BindingUtilities
                    }
                    else
                    {
-                        throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " + dimSize);
+                        throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " +
+                                                       dimSize);
                    }
                }
            }
@ -258,7 +344,8 @@ namespace BindingUtilities

            BYTE* actualData;
            uint32_t actualSizeInBytes;
-            spTensorValueNative->GetBuffer(&actualData, &actualSizeInBytes); //Need to GetBuffer to have CPU memory backing tensorValue
+            spTensorValueNative->GetBuffer(
+                &actualData, &actualSizeInBytes); // Need to GetBuffer to have CPU memory backing tensorValue
            return tensorValue;
        }
        else
@ -419,8 +506,8 @@ namespace BindingUtilities
                com_ptr<ITensorNative> itn = results.Lookup(desc.Name()).as<ITensorNative>();
                HRESULT(itn->GetBuffer(reinterpret_cast<BYTE**>(&tensor), &uCapacity));
                int size = 0;
-                float maxValue = 0;
-                int maxIndex = 0;
+                unsigned int topK = args.TopK();
+                std::vector<std::pair<float, int>> maxKValues;
                std::ofstream fout;
                if (args.IsSaveTensor())
                {
@ -445,12 +532,12 @@ namespace BindingUtilities
                    break;
                    case TensorKind::Float16:
                    {
-                        output.ProcessTensorResult<HALF>(args, tensor, uCapacity, maxValue, maxIndex, fout);
+                        output.ProcessTensorResult<HALF>(args, tensor, uCapacity, maxKValues, fout, topK);
                    }
                    break;
                    case TensorKind::Float:
                    {
-                        output.ProcessTensorResult<float>(args, tensor, uCapacity, maxValue, maxIndex, fout);
+                        output.ProcessTensorResult<float>(args, tensor, uCapacity, maxKValues, fout, topK);
                    }
                    break;
                    case TensorKind::Int64:
@ -472,16 +559,27 @@ namespace BindingUtilities
                if (args.IsSaveTensor())
                {
                    fout.close();
-                    std::string iterationResult =
-                        "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue);
-                    output.SaveResult(iterationNum, iterationResult, static_cast<int>(hash_data(tensor, uCapacity)));
+                    for (auto& pair : maxKValues)
+                    {
+                        auto maxValue = pair.first;
+                        auto maxIndex = pair.second;
+                        std::string iterationResult =
+                            "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue);
+                        output.SaveResult(iterationNum, iterationResult,
+                                          static_cast<int>(hash_data(tensor, uCapacity)));
+                    }
                }
                if (!args.IsGarbageInput() && iterationNum == 0)
                {
-                    std::cout << "Outputting results.. " << std::endl;
+                    std::cout << "Outputting top " << args.TopK() << " values" << std::endl;
                    std::cout << "Feature Name: " << name << std::endl;
-                    std::wcout << " resultVector[" << maxIndex << "] has the maximal value of " << maxValue
-                               << std::endl;
+                    for (auto& pair : maxKValues)
+                    {
+                        auto maxValue = pair.first;
+                        auto maxIndex = pair.second;
+                        std::wcout << " index: " << maxIndex << ", value: " << maxValue
+                                   << std::endl;
+                    }
                }
            }
            else if (desc.Kind() == LearningModelFeatureKind::Sequence)
--- a/Tools/WinMLRunner/src/CommandLineArgs.cpp
+++ b/Tools/WinMLRunner/src/CommandLineArgs.cpp
@ -31,6 +31,7 @@ void CommandLineArgs::PrintUsage()
              << std::endl;
    std::cout << "  -Iterations : # times perf measurements will be run/averaged" << std::endl;
    std::cout << "  -Input <fully qualified path>: binds image or CSV to model" << std::endl;
+    std::cout << "  -TopK <number>: print top <number> values in the result. Default to 1" << std::endl;
    std::cout << "  -PerfOutput [<fully qualified path>]: csv file to write the perf results to" << std::endl;
    std::cout << "  -SavePerIterationPerf : save per iteration performance results to csv file" << std::endl;
    std::cout << "  -SaveTensorData <saveMode folderPath>: saveMode: save first iteration or all iteration output "
@ -278,6 +279,11 @@ CommandLineArgs::CommandLineArgs(const std::vector<std::wstring>& args)
            unsigned thread_interval = std::stoi(args[++i].c_str());
            SetThreadInterval(thread_interval);
        }
+        else if ((_wcsicmp(args[i].c_str(), L"-TopK") == 0))
+        {
+            CheckNextArgument(args, i);
+            SetTopK(std::stoi(args[++i].c_str()));
+        }
        else
        {
            std::wstring msg = L"Unknown option ";
--- a/Tools/WinMLRunner/src/CommandLineArgs.h
+++ b/Tools/WinMLRunner/src/CommandLineArgs.h
@ -72,6 +72,7 @@ public:
    uint32_t NumIterations() const { return m_numIterations; }
    uint32_t NumThreads() const { return m_numThreads; }
    uint32_t ThreadInterval() const { return m_threadInterval; } // Thread interval in milliseconds
+    uint32_t TopK() const { return m_topK; }

    void ToggleCPU(bool useCPU) { m_useCPU = useCPU; }
    void ToggleGPU(bool useGPU) { m_useGPU = useGPU; }
@ -96,6 +97,7 @@ public:
    void SetInputDataPath(const std::wstring& inputDataPath) { m_inputData = inputDataPath; }
    void SetNumThreads(unsigned numThreads) { m_numThreads = numThreads; }
    void SetThreadInterval(unsigned threadInterval) { m_threadInterval = threadInterval; }
+    void SetTopK(unsigned k) { m_topK = k; }
    void SetPerformanceCSVPath(const std::wstring& performanceCSVPath)
    {
        m_perfOutputPath = performanceCSVPath;
@ -140,6 +142,7 @@ private:
    uint32_t m_numIterations = 1;
    uint32_t m_numThreads = 1;
    uint32_t m_threadInterval = 0;
+    uint32_t m_topK = 1;

    void CheckNextArgument(const std::vector<std::wstring>& args, UINT i);
    void CheckForInvalidArguments();
--- a/Tools/WinMLRunner/src/OutputHelper.h
+++ b/Tools/WinMLRunner/src/OutputHelper.h
@ -10,6 +10,7 @@
 #include <dxgi.h>
 #include <Windows.Graphics.DirectX.Direct3D11.interop.h>
 #include <direct.h>
+#include <queue>

 using namespace winrt::Windows::AI::MachineLearning;
 using namespace winrt::Windows::Storage::Streams;
@ -699,20 +700,17 @@ public:
    }

    template <typename T>
-    void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, float& maxValue,
-                             int& maxIndex, std::ofstream& fout)
+    void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity,
+                             std::vector<std::pair<float,int>>& maxValues, std::ofstream& fout,
+                             unsigned int k)
    {
+        // Create a priority queue of size k that pops the lowest value first
+        // We will remove lowest values as we iterate over all the values
+        auto cmp = [](std::pair<float, int> x, std::pair<float, int> y) { return x.first > y.first; };
+        std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>, decltype(cmp)> topKvalues(cmp);
+
        T* tensor = (T*)buffer;
        int size = uCapacity / sizeof(T);
-        if (!std::is_same<T, HALF>::value)
-        {
-            maxValue = *tensor;
-        }
-        else
-        {
-            maxValue = XMConvertHalfToFloat(static_cast<HALF>(*tensor));
-        }
-        maxIndex = 0;
        for (int i = 0; i < size; i++)
        {
            float val = 0;
@ -728,12 +726,29 @@ public:
            {
                fout << i << "," << val << std::endl;
            }
-            if (maxValue < val)
+
+            if (topKvalues.size() < k)
            {
-                maxValue = val;
-                maxIndex = i;
+                topKvalues.push({ val, i });
+            }
+            else if (k > 0)
+            {
+                auto maxValue = topKvalues.top().first;
+                if (maxValue < val)
+                {
+                    topKvalues.pop();
+                    topKvalues.push({ val, i });
+                }
            }
        }
+        while (!topKvalues.empty())
+        {
+            auto pair = topKvalues.top();
+            maxValues.push_back(pair);
+            topKvalues.pop();
+        }
+        // Put vector in order of highest value to lowest
+        std::reverse(maxValues.begin(), maxValues.end());
    }

    void WritePerformanceDataToCSV(const Profiler<WINML_MODEL_TEST_PERF>& profiler, int numIterations,