diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp index ee40e892..fda0f037 100644 --- a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp +++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp @@ -749,6 +749,12 @@ namespace WinMLRunnerTest Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); } + TEST_METHOD(TestTopK) + { + const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"SqueezeNet.onnx", L"-TopK", L"5" }); + Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str())); + } + /* Commenting out test until WinMLRunnerDLL.dll is properly written and ABI friendly TEST_METHOD(TestWinMLRunnerDllLinking) { diff --git a/Tools/WinMLRunner/README.md b/Tools/WinMLRunner/README.md index 6a38138e..f8fb9ee1 100644 --- a/Tools/WinMLRunner/README.md +++ b/Tools/WinMLRunner/README.md @@ -45,6 +45,7 @@ Required command-Line arguments: -Perf [all]: capture performance measurements such as timing and memory usage. Specifying "all" will output all measurements -Iterations : # times perf measurements will be run/averaged -Input : binds image or CSV to model +-TopK : print top values in the result. Default to 1 -PerfOutput []: csv file to write the perf results to -SavePerIterationPerf : save per iteration performance results to csv file -SaveTensorData : saveMode: save first iteration or all iteration output tensor results to csv file [First, All] diff --git a/Tools/WinMLRunner/src/BindingUtilities.h b/Tools/WinMLRunner/src/BindingUtilities.h index 2cc7a8fa..327db4f9 100644 --- a/Tools/WinMLRunner/src/BindingUtilities.h +++ b/Tools/WinMLRunner/src/BindingUtilities.h @@ -13,35 +13,119 @@ using namespace winrt::Windows::Graphics::DirectX; using namespace winrt::Windows::Graphics::Imaging; using namespace winrt::Windows::Graphics::DirectX::Direct3D11; -template struct TensorKindToType { static_assert(true, "No TensorKind mapped for given type!"); }; -template <> struct TensorKindToType { typedef uint8_t Type; }; -template <> struct TensorKindToType { typedef uint8_t Type; }; -template <> struct TensorKindToType { typedef uint16_t Type; }; -template <> struct TensorKindToType { typedef int16_t Type; }; -template <> struct TensorKindToType { typedef uint32_t Type; }; -template <> struct TensorKindToType { typedef int32_t Type; }; -template <> struct TensorKindToType { typedef uint64_t Type; }; -template <> struct TensorKindToType { typedef int64_t Type; }; -template <> struct TensorKindToType { typedef boolean Type; }; -template <> struct TensorKindToType { typedef double Type; }; -template <> struct TensorKindToType { typedef float Type; }; -template <> struct TensorKindToType { typedef float Type; }; -template <> struct TensorKindToType { typedef winrt::hstring Type; }; +template struct TensorKindToType +{ + static_assert(true, "No TensorKind mapped for given type!"); +}; +template <> struct TensorKindToType +{ + typedef uint8_t Type; +}; +template <> struct TensorKindToType +{ + typedef uint8_t Type; +}; +template <> struct TensorKindToType +{ + typedef uint16_t Type; +}; +template <> struct TensorKindToType +{ + typedef int16_t Type; +}; +template <> struct TensorKindToType +{ + typedef uint32_t Type; +}; +template <> struct TensorKindToType +{ + typedef int32_t Type; +}; +template <> struct TensorKindToType +{ + typedef uint64_t Type; +}; +template <> struct TensorKindToType +{ + typedef int64_t Type; +}; +template <> struct TensorKindToType +{ + typedef boolean Type; +}; +template <> struct TensorKindToType +{ + typedef double Type; +}; +template <> struct TensorKindToType +{ + typedef float Type; +}; +template <> struct TensorKindToType +{ + typedef float Type; +}; +template <> struct TensorKindToType +{ + typedef winrt::hstring Type; +}; -template struct TensorKindToValue { static_assert(true, "No TensorKind mapped for given type!"); }; -template <> struct TensorKindToValue { typedef TensorUInt8Bit Type; }; -template <> struct TensorKindToValue { typedef TensorInt8Bit Type; }; -template <> struct TensorKindToValue { typedef TensorUInt16Bit Type; }; -template <> struct TensorKindToValue { typedef TensorInt16Bit Type; }; -template <> struct TensorKindToValue { typedef TensorUInt32Bit Type; }; -template <> struct TensorKindToValue { typedef TensorInt32Bit Type; }; -template <> struct TensorKindToValue { typedef TensorUInt64Bit Type; }; -template <> struct TensorKindToValue { typedef TensorInt64Bit Type; }; -template <> struct TensorKindToValue { typedef TensorBoolean Type; }; -template <> struct TensorKindToValue { typedef TensorDouble Type; }; -template <> struct TensorKindToValue { typedef TensorFloat Type; }; -template <> struct TensorKindToValue { typedef TensorFloat16Bit Type; }; -template <> struct TensorKindToValue { typedef TensorString Type; }; +template struct TensorKindToValue +{ + static_assert(true, "No TensorKind mapped for given type!"); +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt8Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt8Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt32Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt32Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorUInt64Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorInt64Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorBoolean Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorDouble Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorFloat Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorFloat16Bit Type; +}; +template <> struct TensorKindToValue +{ + typedef TensorString Type; +}; namespace BindingUtilities { @@ -235,11 +319,12 @@ namespace BindingUtilities for (UINT dim = 0; dim < tensorDescriptorShape.Size(); dim++) { INT64 dimSize = tensorDescriptorShape.GetAt(dim); - if (dimSize > 0) //If the dimension is greater than 0, then it is known. + if (dimSize > 0) // If the dimension is greater than 0, then it is known. { vecShape.push_back(dimSize); } - else //otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an invalid model. + else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an + // invalid model. { if (dimSize == -1) { @@ -247,7 +332,8 @@ namespace BindingUtilities } else { - throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " + dimSize); + throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " + + dimSize); } } } @@ -258,7 +344,8 @@ namespace BindingUtilities BYTE* actualData; uint32_t actualSizeInBytes; - spTensorValueNative->GetBuffer(&actualData, &actualSizeInBytes); //Need to GetBuffer to have CPU memory backing tensorValue + spTensorValueNative->GetBuffer( + &actualData, &actualSizeInBytes); // Need to GetBuffer to have CPU memory backing tensorValue return tensorValue; } else @@ -419,8 +506,8 @@ namespace BindingUtilities com_ptr itn = results.Lookup(desc.Name()).as(); HRESULT(itn->GetBuffer(reinterpret_cast(&tensor), &uCapacity)); int size = 0; - float maxValue = 0; - int maxIndex = 0; + unsigned int topK = args.TopK(); + std::vector> maxKValues; std::ofstream fout; if (args.IsSaveTensor()) { @@ -445,12 +532,12 @@ namespace BindingUtilities break; case TensorKind::Float16: { - output.ProcessTensorResult(args, tensor, uCapacity, maxValue, maxIndex, fout); + output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); } break; case TensorKind::Float: { - output.ProcessTensorResult(args, tensor, uCapacity, maxValue, maxIndex, fout); + output.ProcessTensorResult(args, tensor, uCapacity, maxKValues, fout, topK); } break; case TensorKind::Int64: @@ -472,16 +559,27 @@ namespace BindingUtilities if (args.IsSaveTensor()) { fout.close(); - std::string iterationResult = - "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue); - output.SaveResult(iterationNum, iterationResult, static_cast(hash_data(tensor, uCapacity))); + for (auto& pair : maxKValues) + { + auto maxValue = pair.first; + auto maxIndex = pair.second; + std::string iterationResult = + "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue); + output.SaveResult(iterationNum, iterationResult, + static_cast(hash_data(tensor, uCapacity))); + } } if (!args.IsGarbageInput() && iterationNum == 0) { - std::cout << "Outputting results.. " << std::endl; + std::cout << "Outputting top " << args.TopK() << " values" << std::endl; std::cout << "Feature Name: " << name << std::endl; - std::wcout << " resultVector[" << maxIndex << "] has the maximal value of " << maxValue - << std::endl; + for (auto& pair : maxKValues) + { + auto maxValue = pair.first; + auto maxIndex = pair.second; + std::wcout << " index: " << maxIndex << ", value: " << maxValue + << std::endl; + } } } else if (desc.Kind() == LearningModelFeatureKind::Sequence) diff --git a/Tools/WinMLRunner/src/CommandLineArgs.cpp b/Tools/WinMLRunner/src/CommandLineArgs.cpp index 765c316f..9adf0567 100644 --- a/Tools/WinMLRunner/src/CommandLineArgs.cpp +++ b/Tools/WinMLRunner/src/CommandLineArgs.cpp @@ -31,6 +31,7 @@ void CommandLineArgs::PrintUsage() << std::endl; std::cout << " -Iterations : # times perf measurements will be run/averaged" << std::endl; std::cout << " -Input : binds image or CSV to model" << std::endl; + std::cout << " -TopK : print top values in the result. Default to 1" << std::endl; std::cout << " -PerfOutput []: csv file to write the perf results to" << std::endl; std::cout << " -SavePerIterationPerf : save per iteration performance results to csv file" << std::endl; std::cout << " -SaveTensorData : saveMode: save first iteration or all iteration output " @@ -278,6 +279,11 @@ CommandLineArgs::CommandLineArgs(const std::vector& args) unsigned thread_interval = std::stoi(args[++i].c_str()); SetThreadInterval(thread_interval); } + else if ((_wcsicmp(args[i].c_str(), L"-TopK") == 0)) + { + CheckNextArgument(args, i); + SetTopK(std::stoi(args[++i].c_str())); + } else { std::wstring msg = L"Unknown option "; diff --git a/Tools/WinMLRunner/src/CommandLineArgs.h b/Tools/WinMLRunner/src/CommandLineArgs.h index c41396c0..18afd558 100644 --- a/Tools/WinMLRunner/src/CommandLineArgs.h +++ b/Tools/WinMLRunner/src/CommandLineArgs.h @@ -72,6 +72,7 @@ public: uint32_t NumIterations() const { return m_numIterations; } uint32_t NumThreads() const { return m_numThreads; } uint32_t ThreadInterval() const { return m_threadInterval; } // Thread interval in milliseconds + uint32_t TopK() const { return m_topK; } void ToggleCPU(bool useCPU) { m_useCPU = useCPU; } void ToggleGPU(bool useGPU) { m_useGPU = useGPU; } @@ -96,6 +97,7 @@ public: void SetInputDataPath(const std::wstring& inputDataPath) { m_inputData = inputDataPath; } void SetNumThreads(unsigned numThreads) { m_numThreads = numThreads; } void SetThreadInterval(unsigned threadInterval) { m_threadInterval = threadInterval; } + void SetTopK(unsigned k) { m_topK = k; } void SetPerformanceCSVPath(const std::wstring& performanceCSVPath) { m_perfOutputPath = performanceCSVPath; @@ -140,6 +142,7 @@ private: uint32_t m_numIterations = 1; uint32_t m_numThreads = 1; uint32_t m_threadInterval = 0; + uint32_t m_topK = 1; void CheckNextArgument(const std::vector& args, UINT i); void CheckForInvalidArguments(); diff --git a/Tools/WinMLRunner/src/OutputHelper.h b/Tools/WinMLRunner/src/OutputHelper.h index eb71b8eb..4c17a2e6 100644 --- a/Tools/WinMLRunner/src/OutputHelper.h +++ b/Tools/WinMLRunner/src/OutputHelper.h @@ -10,6 +10,7 @@ #include #include #include +#include using namespace winrt::Windows::AI::MachineLearning; using namespace winrt::Windows::Storage::Streams; @@ -699,20 +700,17 @@ public: } template - void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, float& maxValue, - int& maxIndex, std::ofstream& fout) + void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity, + std::vector>& maxValues, std::ofstream& fout, + unsigned int k) { + // Create a priority queue of size k that pops the lowest value first + // We will remove lowest values as we iterate over all the values + auto cmp = [](std::pair x, std::pair y) { return x.first > y.first; }; + std::priority_queue, std::vector>, decltype(cmp)> topKvalues(cmp); + T* tensor = (T*)buffer; int size = uCapacity / sizeof(T); - if (!std::is_same::value) - { - maxValue = *tensor; - } - else - { - maxValue = XMConvertHalfToFloat(static_cast(*tensor)); - } - maxIndex = 0; for (int i = 0; i < size; i++) { float val = 0; @@ -728,12 +726,29 @@ public: { fout << i << "," << val << std::endl; } - if (maxValue < val) + + if (topKvalues.size() < k) { - maxValue = val; - maxIndex = i; + topKvalues.push({ val, i }); + } + else if (k > 0) + { + auto maxValue = topKvalues.top().first; + if (maxValue < val) + { + topKvalues.pop(); + topKvalues.push({ val, i }); + } } } + while (!topKvalues.empty()) + { + auto pair = topKvalues.top(); + maxValues.push_back(pair); + topKvalues.pop(); + } + // Put vector in order of highest value to lowest + std::reverse(maxValues.begin(), maxValues.end()); } void WritePerformanceDataToCSV(const Profiler& profiler, int numIterations,