WinMLModelRunner sample
This commit is contained in:
Родитель
1ef6ae729d
Коммит
9fd8281724
|
@ -0,0 +1,109 @@
|
|||
#pragma once
|
||||
#include "Common.h"
|
||||
#include "ModelBinding.h"
|
||||
|
||||
using namespace winrt::Windows::AI::MachineLearning;
|
||||
namespace BindingUtilities
|
||||
{
|
||||
|
||||
void BindTensorsFromGarbageData(LearningModelBinding context, LearningModel model) {
|
||||
for (auto&& description : model.InputFeatures())
|
||||
{
|
||||
if (description == nullptr)
|
||||
{
|
||||
ThrowFailure(L" Learning model has no binding description.");
|
||||
}
|
||||
|
||||
hstring name = description.Name();
|
||||
TensorFeatureDescriptor tensorDescriptor = description.as<TensorFeatureDescriptor>();
|
||||
TensorKind tensorKind = tensorDescriptor.TensorKind();
|
||||
|
||||
switch (tensorKind) {
|
||||
case TensorKind::Undefined:
|
||||
{
|
||||
ThrowFailure(L" TensorKind is undefined.");
|
||||
}
|
||||
case TensorKind::Float:
|
||||
{
|
||||
ModelBinding<float> binding(description);
|
||||
ITensor tensor = TensorFloat::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::Double:
|
||||
{
|
||||
ModelBinding<double> binding(description);
|
||||
ITensor tensor = TensorDouble::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::Int8:
|
||||
{
|
||||
ModelBinding<uint8_t> binding(description);
|
||||
ITensor tensor = TensorInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::UInt8:
|
||||
{
|
||||
ModelBinding<uint8_t> binding(description);
|
||||
ITensor tensor = TensorUInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::Int16:
|
||||
{
|
||||
ModelBinding<int16_t> binding(description);
|
||||
ITensor tensor = TensorInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::UInt16:
|
||||
{
|
||||
ModelBinding<uint16_t> binding(description);
|
||||
ITensor tensor = TensorUInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::Int32:
|
||||
{
|
||||
ModelBinding<int32_t> binding(description);
|
||||
ITensor tensor = TensorInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::UInt32:
|
||||
{
|
||||
ModelBinding<uint32_t> binding(description);
|
||||
ITensor tensor = TensorUInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::Int64:
|
||||
{
|
||||
ModelBinding<int64_t> binding(description);
|
||||
ITensor tensor = TensorInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
case TensorKind::UInt64:
|
||||
{
|
||||
ModelBinding<uint64_t> binding(description);
|
||||
ITensor tensor = TensorUInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
|
||||
context.Bind(name, tensor);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
{
|
||||
ThrowFailure(L"TensorKind has not been implemented.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BindGarbageDataToContext(LearningModelBinding context, LearningModel model) {
|
||||
context.Clear();
|
||||
BindTensorsFromGarbageData(context, model);
|
||||
}
|
||||
};
|
|
@ -0,0 +1,42 @@
|
|||
#include <Windows.h>
|
||||
#include <string>
|
||||
#include "CommandLineArgs.h"
|
||||
|
||||
CommandLineArgs::CommandLineArgs()
|
||||
{
|
||||
int numArgs = 0;
|
||||
LPWSTR* args = CommandLineToArgvW(GetCommandLineW(), &numArgs);
|
||||
|
||||
for (int i = 0; i < numArgs; i++)
|
||||
{
|
||||
if ((_wcsicmp(args[i], L"-CPU") == 0))
|
||||
{
|
||||
m_useCPU = true;
|
||||
}
|
||||
else if ((_wcsicmp(args[i], L"-GPU") == 0))
|
||||
{
|
||||
m_useGPU = true;
|
||||
}
|
||||
if ((_wcsicmp(args[i], L"-iterations") == 0) && (i + 1 < numArgs))
|
||||
{
|
||||
m_numIterations = static_cast<UINT>(_wtoi(args[++i]));
|
||||
}
|
||||
else if ((_wcsicmp(args[i], L"-model") == 0) && (i + 1 < numArgs))
|
||||
{
|
||||
m_modelPath = args[++i];
|
||||
}
|
||||
else if ((_wcsicmp(args[i], L"-folder") == 0) && (i + 1 < numArgs))
|
||||
{
|
||||
m_folderPath = args[++i];
|
||||
}
|
||||
else if ((_wcsicmp(args[i], L"-disableMetacommands") == 0))
|
||||
{
|
||||
m_metacommandsEnabled = false;
|
||||
}
|
||||
else if ((_wcsicmp(args[i], L"-csv") == 0))
|
||||
{
|
||||
m_csvFileName = args[++i];
|
||||
}
|
||||
}
|
||||
m_useCPUandGPU = m_useCPU == m_useGPU;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
class CommandLineArgs
|
||||
{
|
||||
public:
|
||||
CommandLineArgs();
|
||||
|
||||
bool UseCPU() const { return m_useCPU; }
|
||||
bool UseGPU() const { return m_useGPU; }
|
||||
bool UseCPUandGPU() const { return m_useCPUandGPU; }
|
||||
|
||||
const std::wstring& ModelPath() const { return m_modelPath; }
|
||||
void SetModelPath(std::wstring path) { m_modelPath = path; }
|
||||
const std::wstring& FolderPath() const { return m_folderPath; }
|
||||
UINT NumIterations() const { return m_numIterations; }
|
||||
std::wstring CsvFileName() { return m_csvFileName; }
|
||||
bool MetacommandsEnabled() const { return m_metacommandsEnabled; }
|
||||
|
||||
private:
|
||||
bool m_useCPU = false;
|
||||
bool m_useGPU = false;
|
||||
bool m_useCPUandGPU = false;
|
||||
std::wstring m_folderPath;
|
||||
std::wstring m_modelPath;
|
||||
std::wstring m_csvFileName;
|
||||
UINT m_numIterations = 1;
|
||||
bool m_metacommandsEnabled = false;
|
||||
};
|
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
|
||||
#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS
|
||||
#include <winrt/Windows.AI.MachineLearning.h>
|
||||
#include <winrt/Windows.Foundation.h>
|
||||
#include <winrt/Windows.Media.h>
|
||||
#include <winrt/Windows.Graphics.Imaging.h>
|
||||
#include <winrt/Windows.Media.h>
|
||||
#include "winrt/Windows.Storage.h"
|
||||
#include <winrt/Windows.Storage.Streams.h>
|
||||
#include "TimerHelper.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <atlbase.h>
|
||||
#include <sstream>
|
||||
#include <comdef.h>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <future>
|
||||
#include <thread>
|
||||
#include <dxgi1_6.h>
|
||||
|
||||
|
||||
enum WINML_MODEL_TEST_PERF
|
||||
{
|
||||
ENTIRE_TEST = 0,
|
||||
LOAD_MODEL,
|
||||
CREATE_SESSION,
|
||||
BIND_VALUE,
|
||||
EVAL_MODEL,
|
||||
EVAL_MODEL_FIRST_RUN,
|
||||
COUNT
|
||||
};
|
||||
|
||||
static std::vector<std::wstring> WINML_MODEL_TEST_PERF_NAMES =
|
||||
{
|
||||
L"ENTIRE TEST ",
|
||||
L" LOAD MODEL ",
|
||||
L" CREATE SESSION ",
|
||||
L" BIND VALUE ",
|
||||
L" EVAL MODEL ",
|
||||
};
|
||||
|
||||
#define MAX_PROFILING_LOOP 100
|
||||
|
||||
using namespace winrt;
|
||||
|
||||
inline std::wstring MakeErrorMsg(HRESULT hr)
|
||||
{
|
||||
std::wostringstream ss;
|
||||
ss << L"0x" << std::hex << hr << ": " << _com_error(hr).ErrorMessage();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
inline std::wstring MakeErrorMsg(HRESULT hr, const std::wstring &errorMsg)
|
||||
{
|
||||
std::wostringstream ss;
|
||||
ss << errorMsg << L" (" << (MakeErrorMsg(hr)) << L")";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
inline void WriteErrorMsg(const std::wstring &errorMsg)
|
||||
{
|
||||
std::wostringstream ss;
|
||||
ss << L"ERROR: " << errorMsg << std::endl;
|
||||
OutputDebugStringW(ss.str().c_str());
|
||||
std::wcout << ss.str() << std::endl;
|
||||
}
|
||||
|
||||
inline void WriteErrorMsg(HRESULT hr, const std::wstring &errorMsg = L"")
|
||||
{
|
||||
std::wostringstream ss;
|
||||
ss << errorMsg << L" (" << (MakeErrorMsg(hr)) << L")";
|
||||
WriteErrorMsg(ss.str());
|
||||
}
|
||||
|
||||
inline void ThrowIfFailed(HRESULT hr, const std::wstring &errorMsg = L"")
|
||||
{
|
||||
if (FAILED(hr))
|
||||
{
|
||||
throw MakeErrorMsg(hr, errorMsg);
|
||||
}
|
||||
}
|
||||
|
||||
inline void ThrowFailure(const std::wstring &errorMsg)
|
||||
{
|
||||
throw errorMsg;
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
#include "Common.h"
|
||||
#include "OutputHelper.h"
|
||||
#include "ModelBinding.h"
|
||||
#include "BindingUtilities.h"
|
||||
#include "Stopwatch.h"
|
||||
#include "CommandLineArgs.h"
|
||||
#include <filesystem>
|
||||
|
||||
#define CheckHr(expr, errorMsg) hr = (expr); if (FAILED(hr)) { WriteErrorMsg(hr, errorMsg); return 1; }
|
||||
|
||||
Profiler<WINML_MODEL_TEST_PERF> g_Profiler;
|
||||
int g_GarbageRuns = 10;
|
||||
// Loads, binds, and evaluates the user-specified model and outputs the GPU/CPU and
|
||||
// wall-clock times(in ms) for each step to the command line.
|
||||
void EvaluateModel(CommandLineArgs args, std::wstring modelName, OutputHelper * output, LearningModelDeviceKind deviceKind)
|
||||
{
|
||||
Stopwatch timer;
|
||||
output->PrintModelInfo(modelName, deviceKind);
|
||||
|
||||
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
|
||||
timer.Click();
|
||||
|
||||
LearningModel model = nullptr;
|
||||
|
||||
try
|
||||
{
|
||||
model = LearningModel::LoadFromFilePath(args.ModelPath());
|
||||
}
|
||||
catch (const std::wstring &msg)
|
||||
{
|
||||
WriteErrorMsg(msg);
|
||||
return;
|
||||
}
|
||||
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
|
||||
timer.Click();
|
||||
output->m_clockLoadTime = timer.GetElapsedMilliseconds();
|
||||
|
||||
LearningModelSession session(model, LearningModelDevice(deviceKind));
|
||||
LearningModelBinding binding(session);
|
||||
|
||||
// Initialize model input and bind garbage data.
|
||||
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
|
||||
timer.Click();
|
||||
try
|
||||
{
|
||||
BindingUtilities::BindGarbageDataToContext(binding, model);
|
||||
}
|
||||
catch (const std::wstring &msg)
|
||||
{
|
||||
WriteErrorMsg(msg);
|
||||
return;
|
||||
}
|
||||
timer.Click();
|
||||
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
|
||||
output->m_clockBindTime = timer.GetElapsedMilliseconds();
|
||||
|
||||
for (int i = 0; i < g_GarbageRuns; i++) {
|
||||
auto result = session.Evaluate(binding, L"");
|
||||
}
|
||||
for (UINT i = 0; i < args.NumIterations(); i++)
|
||||
{
|
||||
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
|
||||
timer.Click();
|
||||
auto result = session.Evaluate(binding, L"");
|
||||
timer.Click();
|
||||
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
|
||||
output->m_clockEvalTimes.push_back(timer.GetElapsedMilliseconds());
|
||||
}
|
||||
|
||||
output->PrintWallClockTimes(args.NumIterations());
|
||||
if (deviceKind == LearningModelDeviceKind::DirectX)
|
||||
{
|
||||
output->PrintGPUTimes(g_Profiler, args.NumIterations());
|
||||
}
|
||||
else
|
||||
{
|
||||
output->PrintCPUTimes(g_Profiler, args.NumIterations());
|
||||
}
|
||||
g_Profiler.Reset();
|
||||
}
|
||||
|
||||
void EvaluateModelsInDirectory(CommandLineArgs args, OutputHelper * output)
|
||||
{
|
||||
std::wstring folderPath = args.FolderPath();
|
||||
for (auto & it : std::filesystem::directory_iterator(args.FolderPath()))
|
||||
{
|
||||
std::string path = it.path().string();
|
||||
if (it.path().string().find(".onnx") != std::string::npos ||
|
||||
it.path().string().find(".pb") != std::string::npos)
|
||||
{
|
||||
std::wstring fileName;
|
||||
fileName.assign(path.begin(), path.end());
|
||||
args.SetModelPath(fileName);
|
||||
try
|
||||
{
|
||||
if (args.UseCPUandGPU() || args.UseGPU())
|
||||
{
|
||||
EvaluateModel(args, args.ModelPath(), output, LearningModelDeviceKind::DirectX);
|
||||
}
|
||||
if (args.UseCPUandGPU() || args.UseCPU())
|
||||
{
|
||||
EvaluateModel(args, args.ModelPath(), output, LearningModelDeviceKind::Cpu);
|
||||
}
|
||||
output->WritePerformanceDataToCSV(g_Profiler, args, fileName);
|
||||
output->Reset();
|
||||
}
|
||||
catch (const std::wstring &msg)
|
||||
{
|
||||
WriteErrorMsg(msg);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
CommandLineArgs args;
|
||||
OutputHelper output;
|
||||
|
||||
winrt::init_apartment();
|
||||
output.PrintHardwareInfo();
|
||||
g_Profiler.Enable();
|
||||
|
||||
std::wstring csvFileName = args.CsvFileName();
|
||||
if (csvFileName.empty())
|
||||
{
|
||||
output.SetDefaultCSVFileName();
|
||||
}
|
||||
else
|
||||
{
|
||||
output.m_csvFileName = csvFileName;
|
||||
}
|
||||
if (!args.ModelPath().empty())
|
||||
{
|
||||
|
||||
if (args.UseCPUandGPU() || args.UseGPU())
|
||||
{
|
||||
EvaluateModel(args, args.ModelPath(), &output, LearningModelDeviceKind::DirectX);
|
||||
}
|
||||
if (args.UseCPUandGPU() || args.UseCPU())
|
||||
{
|
||||
EvaluateModel(args, args.ModelPath(), &output, LearningModelDeviceKind::Cpu);
|
||||
}
|
||||
output.WritePerformanceDataToCSV(g_Profiler, args, args.ModelPath());
|
||||
output.Reset();
|
||||
}
|
||||
else if (!args.FolderPath().empty())
|
||||
{
|
||||
EvaluateModelsInDirectory(args, &output);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
#pragma once
|
||||
#include "Common.h"
|
||||
|
||||
// Data storage for a model input or output variable.
|
||||
template< typename T>
|
||||
class ModelBinding
|
||||
{
|
||||
public:
|
||||
ModelBinding(winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor variableDesc) : m_bindingDesc(variableDesc)
|
||||
{
|
||||
UINT numElements = 0;
|
||||
if (variableDesc.Kind() == LearningModelFeatureKind::Tensor)
|
||||
{
|
||||
InitTensorBinding(variableDesc, numElements);
|
||||
}
|
||||
else
|
||||
{
|
||||
ThrowFailure(L"ModelBinding: Binding feature type not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor GetDesc()
|
||||
{
|
||||
return m_bindingDesc;
|
||||
}
|
||||
|
||||
UINT GetNumElements() const
|
||||
{
|
||||
return m_numElements;
|
||||
}
|
||||
|
||||
UINT GetElementSize() const
|
||||
{
|
||||
return m_elementSize;
|
||||
}
|
||||
|
||||
std::vector<INT64> GetShapeBuffer()
|
||||
{
|
||||
return m_shapeBuffer;
|
||||
}
|
||||
|
||||
void* GetData()
|
||||
{
|
||||
return m_dataBuffer.data();
|
||||
}
|
||||
|
||||
std::vector<T> GetDataBuffer()
|
||||
{
|
||||
return m_dataBuffer;
|
||||
}
|
||||
|
||||
size_t GetDataBufferSize()
|
||||
{
|
||||
return m_dataBuffer.size();
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
void InitNumElementsAndShape(winrt::Windows::Foundation::Collections::IVectorView<int64_t> * shape, UINT numDimensions, UINT numElements)
|
||||
{
|
||||
int unknownDim = -1;
|
||||
UINT numKnownElements = 1;
|
||||
for (UINT dim = 0; dim < numDimensions; dim++)
|
||||
{
|
||||
INT64 dimSize = shape->GetAt(dim);
|
||||
|
||||
if (dimSize <= 0)
|
||||
{
|
||||
if (unknownDim == -1)
|
||||
{
|
||||
dimSize = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
numKnownElements *= static_cast<UINT>(dimSize);
|
||||
}
|
||||
|
||||
m_shapeBuffer.push_back(dimSize);
|
||||
}
|
||||
m_numElements = numKnownElements;
|
||||
}
|
||||
|
||||
void InitTensorBinding(winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor descriptor, UINT numElements)
|
||||
{
|
||||
auto tensorDescriptor = descriptor.as<winrt::Windows::AI::MachineLearning::TensorFeatureDescriptor>();
|
||||
InitNumElementsAndShape(&tensorDescriptor.Shape(), tensorDescriptor.Shape().Size(), 1);
|
||||
m_dataBuffer.resize(m_numElements);
|
||||
}
|
||||
|
||||
winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor m_bindingDesc;
|
||||
std::vector<INT64> m_shapeBuffer;
|
||||
UINT m_numElements = 0;
|
||||
std::vector<T> m_dataBuffer;
|
||||
};
|
|
@ -0,0 +1,221 @@
|
|||
#pragma once
|
||||
#include "Common.h"
|
||||
#include <time.h>
|
||||
#include "CommandLineArgs.h"
|
||||
#include <fstream>
|
||||
#include <ctime>
|
||||
#include <locale>
|
||||
#include <utility>
|
||||
#include <codecvt>
|
||||
#include <iomanip>
|
||||
#include <windows.h>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace winrt::Windows::AI::MachineLearning;
|
||||
|
||||
// Stores performance information and handles output to the command line and CSV files.
|
||||
class OutputHelper
|
||||
{
|
||||
public:
|
||||
OutputHelper() {}
|
||||
|
||||
void PrintWallClockTimes(UINT iterations)
|
||||
{
|
||||
double totalEvalTime = std::accumulate(m_clockEvalTimes.begin(), m_clockEvalTimes.end(), 0.0);
|
||||
m_clockEvalTime = totalEvalTime / (double)iterations;
|
||||
|
||||
std::cout << "Wall-clock Time Averages (iterations = " << iterations << "):" << std::endl;
|
||||
std::cout << " Load: " << m_clockLoadTime << " ms" << std::endl;
|
||||
std::cout << " Bind: " << m_clockBindTime << " ms" << std::endl;
|
||||
std::cout << " Evaluate: " << m_clockEvalTime << " ms" << std::endl;
|
||||
std::cout << " Total time: " << m_clockLoadTime + m_clockBindTime + m_clockEvalTime << " ms" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintCPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
|
||||
{
|
||||
m_CPULoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
|
||||
m_CPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
|
||||
m_CPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
|
||||
m_CPUEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::CPU_USAGE);
|
||||
|
||||
std::cout << "CPU Time Averages (iterations = " << iterations << "):" << std::endl;
|
||||
std::cout << " Load: " << m_CPULoadTime << " ms" << std::endl;
|
||||
std::cout << " Bind: " << m_CPUBindTime << " ms" << std::endl;
|
||||
std::cout << " Evaluate: " << m_CPUEvalTime << " ms" << std::endl;
|
||||
std::cout << " Total time: " << m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << " ms" << std::endl;
|
||||
std::cout << " Evaluate memory usage: " << m_CPUEvalMemoryUsage << " mb" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintGPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
|
||||
{
|
||||
m_GPULoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
|
||||
m_GPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
|
||||
m_GPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
|
||||
m_GPUEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::CPU_USAGE);
|
||||
|
||||
std::cout << "GPU Time Averages (iterations = " << iterations << "):" << std::endl;
|
||||
std::cout << " Load: " << m_GPULoadTime << " ms" << std::endl;
|
||||
std::cout << " Bind: " << m_GPUBindTime << " ms" << std::endl;
|
||||
std::cout << " Evaluate: " << m_GPUEvalTime << " ms" << std::endl;
|
||||
std::cout << " Total time: " << m_GPULoadTime + m_GPUBindTime + m_GPUEvalTime << " ms" << std::endl;
|
||||
std::cout << " Evaluate memory usage: " << m_GPUEvalMemoryUsage << " mb" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintModelInfo(std::wstring modelName, LearningModelDeviceKind deviceKind)
|
||||
{
|
||||
std::wstring device = deviceKind == LearningModelDeviceKind::Cpu ? L" [CPU]" : L" [GPU]";
|
||||
std::wcout << modelName << device << std::endl;
|
||||
std::cout << "=================================================================" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintHardwareInfo()
|
||||
{
|
||||
std::cout << "WinML Model Runner" << std::endl;
|
||||
|
||||
com_ptr<IDXGIFactory6> factory;
|
||||
(CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void()));
|
||||
com_ptr<IDXGIAdapter> adapter;
|
||||
factory->EnumAdapters(0, adapter.put());
|
||||
DXGI_ADAPTER_DESC description;
|
||||
if (SUCCEEDED(adapter->GetDesc(&description)))
|
||||
{
|
||||
std::wcout << L"GPU: " << description.Description << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void SetDefaultCSVFileName()
|
||||
{
|
||||
auto time = std::time(nullptr);
|
||||
struct tm localTime;
|
||||
localtime_s(&localTime, &time);
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(&localTime, "%Y-%m-%d");
|
||||
std::string fileName = "WinML Model Run [" + oss.str() + "].csv";
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
m_csvFileName = converter.from_bytes(fileName);
|
||||
}
|
||||
|
||||
void WritePerformanceDataToCSV(Profiler<WINML_MODEL_TEST_PERF> &g_Profiler, CommandLineArgs args, std::wstring model)
|
||||
{
|
||||
if (m_csvFileName.length() > 0)
|
||||
{
|
||||
// Check if header exists
|
||||
bool bNewFile = false;
|
||||
std::ifstream fin;
|
||||
fin.open(m_csvFileName);
|
||||
std::filebuf* outbuf = fin.rdbuf();
|
||||
if (EOF == outbuf->sbumpc())
|
||||
{
|
||||
bNewFile = true;
|
||||
}
|
||||
fin.close();
|
||||
|
||||
std::ofstream fout;
|
||||
fout.open(m_csvFileName, std::ios_base::app);
|
||||
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
std::string modelName = converter.to_bytes(model);
|
||||
|
||||
if (bNewFile)
|
||||
{
|
||||
fout << "Model Name" << ","
|
||||
<< "Iterations" << ",";
|
||||
|
||||
if (args.UseCPUandGPU() || args.UseCPU())
|
||||
{
|
||||
fout << "CPU Load (ms)" << ","
|
||||
<< "CPU Bind (ms)" << ","
|
||||
<< "CPU Evaluate (ms)" << ","
|
||||
<< "CPU total time (ms)" << ","
|
||||
<< "CPU Usage (Evaluate) (mb)" << ",";
|
||||
}
|
||||
if (args.UseCPUandGPU() || args.UseGPU())
|
||||
{
|
||||
|
||||
fout << "GPU Load (ms)" << ","
|
||||
<< "GPU Bind (ms)" << ","
|
||||
<< "GPU Evaluate (ms)" << ","
|
||||
<< "GPU total time (ms)" << ","
|
||||
<< "GPU Usage (Evaluate) (mb)" << ",";
|
||||
}
|
||||
|
||||
fout << "Wall-clock Load (ms)" << ","
|
||||
<< "Wall-clock Bind (ms)" << ","
|
||||
<< "Wall-clock Evaluate (ms)" << ","
|
||||
<< "Wall-clock total time (ms)" << ","
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
fout << modelName << "," << args.NumIterations() << ",";
|
||||
|
||||
if (args.UseCPUandGPU() || args.UseCPU())
|
||||
{
|
||||
fout << m_CPULoadTime << ","
|
||||
<< m_CPUBindTime << ","
|
||||
<< m_CPUEvalTime << ","
|
||||
<< m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << ","
|
||||
<< m_CPUEvalMemoryUsage << ",";
|
||||
}
|
||||
if (args.UseCPUandGPU() || args.UseGPU())
|
||||
{
|
||||
fout << m_GPULoadTime << ","
|
||||
<< m_GPUBindTime << ","
|
||||
<< m_GPUEvalTime << ","
|
||||
<< m_GPULoadTime + m_GPUBindTime + m_GPUEvalTime << ","
|
||||
<< m_GPUEvalMemoryUsage << ",";
|
||||
}
|
||||
|
||||
fout << m_clockLoadTime << ","
|
||||
<< m_clockBindTime << ","
|
||||
<< m_clockEvalTime << ","
|
||||
<< m_clockLoadTime + m_clockBindTime + m_clockEvalTime << ","
|
||||
<< std::endl;
|
||||
fout.close();
|
||||
}
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
|
||||
m_GPULoadTime = 0;
|
||||
m_GPUBindTime = 0;
|
||||
m_GPUEvalTime = 0;
|
||||
m_GPUEvalMemoryUsage = 0;
|
||||
|
||||
m_CPULoadTime = 0;
|
||||
m_CPUBindTime = 0;
|
||||
m_CPUEvalTime = 0;
|
||||
m_CPUEvalMemoryUsage = 0;
|
||||
|
||||
|
||||
m_clockLoadTime = 0;
|
||||
m_clockBindTime = 0;
|
||||
m_clockEvalTime = 0;
|
||||
}
|
||||
|
||||
double m_clockLoadTime = 0;
|
||||
double m_clockBindTime = 0;
|
||||
std::vector<double> m_clockEvalTimes;
|
||||
|
||||
std::wstring m_csvFileName;
|
||||
|
||||
private:
|
||||
double m_GPULoadTime = 0;
|
||||
double m_GPUBindTime = 0;
|
||||
double m_GPUEvalTime = 0;
|
||||
double m_GPUEvalMemoryUsage = 0;
|
||||
|
||||
double m_CPULoadTime = 0;
|
||||
double m_CPUBindTime = 0;
|
||||
double m_CPUEvalTime = 0;
|
||||
double m_CPUEvalMemoryUsage = 0;
|
||||
|
||||
double m_clockEvalTime = 0;
|
||||
|
||||
};
|
|
@ -0,0 +1,16 @@
|
|||
#include <Windows.h>
|
||||
#include "Stopwatch.h"
|
||||
|
||||
Stopwatch::Stopwatch()
|
||||
{
|
||||
QueryPerformanceFrequency(&m_frequency);
|
||||
}
|
||||
|
||||
void Stopwatch::Click()
|
||||
{
|
||||
LARGE_INTEGER currentTime;
|
||||
QueryPerformanceCounter(¤tTime);
|
||||
auto delta = static_cast<double>(currentTime.QuadPart - m_lastClickTime.QuadPart);
|
||||
m_elapsedMilliseconds = delta / (static_cast<double>(m_frequency.QuadPart) / 1000.0);
|
||||
m_lastClickTime = currentTime;
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
class Stopwatch
|
||||
{
|
||||
public:
|
||||
Stopwatch();
|
||||
|
||||
void Click();
|
||||
|
||||
// Time elapsed between last two clicks.
|
||||
inline double GetElapsedMilliseconds() const
|
||||
{
|
||||
return m_elapsedMilliseconds;
|
||||
}
|
||||
|
||||
private:
|
||||
LARGE_INTEGER m_lastClickTime;
|
||||
LARGE_INTEGER m_frequency;
|
||||
double m_elapsedMilliseconds = 0.0;
|
||||
};
|
|
@ -0,0 +1,617 @@
|
|||
#pragma once
|
||||
|
||||
#include <windows.h>
|
||||
#include <cmath>
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
#include <Pdh.h>
|
||||
#include <PdhMsg.h>
|
||||
#endif
|
||||
#include <psapi.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define TIMER_SLOT_SIZE (128)
|
||||
#define CONVERT_100NS_TO_SECOND(x) ((x) * 0.0000001)
|
||||
#define BYTE_TO_MB(x) ((x)/(1024.0*1024.0))
|
||||
|
||||
// A stopwatch to measure the time passed (in seconds) between current Stop call and the closest Start call that has been called before.
|
||||
class Timer
|
||||
{
|
||||
public:
|
||||
void Start()
|
||||
{
|
||||
LARGE_INTEGER t;
|
||||
QueryPerformanceCounter(&t);
|
||||
m_startTime = static_cast<double>(t.QuadPart);
|
||||
}
|
||||
|
||||
double Stop()
|
||||
{
|
||||
LARGE_INTEGER stopTime;
|
||||
QueryPerformanceCounter(&stopTime);
|
||||
double t = static_cast<double>(stopTime.QuadPart) - m_startTime;
|
||||
LARGE_INTEGER tps;
|
||||
QueryPerformanceFrequency(&tps);
|
||||
return t / static_cast<double>(tps.QuadPart);
|
||||
}
|
||||
|
||||
private:
|
||||
double m_startTime;
|
||||
};
|
||||
|
||||
class CpuPerfCounter
|
||||
{
|
||||
public:
|
||||
CpuPerfCounter()
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
~CpuPerfCounter() {}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
SYSTEM_INFO sysInfo = { 0 };
|
||||
GetSystemInfo(&sysInfo);
|
||||
|
||||
m_startKernelTime = { 0 };
|
||||
m_startUserTime = { 0 };
|
||||
m_numProcessors = sysInfo.dwNumberOfProcessors;
|
||||
m_procHandle = GetCurrentProcess();;
|
||||
m_pid = GetCurrentProcessId();;
|
||||
m_previousStartCallFailed = true;
|
||||
m_processTime = 0;
|
||||
m_startPageFaultCount = 0;
|
||||
m_startPagefileUsage = 0;
|
||||
m_startPeakPagefileUsage = 0;
|
||||
m_startWorkingSetSize = 0;
|
||||
m_startPeakWorkingSetSize = 0;
|
||||
m_deltaPageFaultCount = 0;
|
||||
m_deltaPagefileUsage = 0;
|
||||
m_deltaPeakPagefileUsage = 0;
|
||||
m_deltaWorkingSetSize = 0;
|
||||
m_deltaPeakWorkingSetSize = 0;
|
||||
}
|
||||
|
||||
void Start()
|
||||
{
|
||||
FILETIME ftIgnore, ftKernel, ftUser;
|
||||
|
||||
if (!GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) ||
|
||||
!GetProcessMemoryCounters(m_pid, m_startPageFaultCount, m_startPagefileUsage, m_startPeakPagefileUsage, m_startWorkingSetSize, m_startPeakWorkingSetSize))
|
||||
{
|
||||
m_previousStartCallFailed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&m_startKernelTime, &ftKernel, sizeof(FILETIME));
|
||||
memcpy(&m_startUserTime, &ftUser, sizeof(FILETIME));
|
||||
m_previousStartCallFailed = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Stop()
|
||||
{
|
||||
FILETIME ftIgnore, ftKernel, ftUser;
|
||||
ULARGE_INTEGER stopKernelTime, stopUserTime;
|
||||
ULONG stopPageFaultCount = 0;
|
||||
SIZE_T stopPagefileUsage = 0;
|
||||
SIZE_T stopPeakPagefileUsage = 0;
|
||||
SIZE_T stopWorkingSetSize = 0;
|
||||
SIZE_T stopPeakWorkingSetSize = 0;
|
||||
|
||||
if (m_previousStartCallFailed ||
|
||||
m_numProcessors == 0 ||
|
||||
!GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) ||
|
||||
!GetProcessMemoryCounters(m_pid, stopPageFaultCount, stopPagefileUsage, stopPeakPagefileUsage, stopWorkingSetSize, stopPeakWorkingSetSize))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(&stopKernelTime, &ftKernel, sizeof(FILETIME));
|
||||
memcpy(&stopUserTime, &ftUser, sizeof(FILETIME));
|
||||
m_processTime = CONVERT_100NS_TO_SECOND((stopKernelTime.QuadPart - m_startKernelTime.QuadPart) + (stopUserTime.QuadPart - m_startUserTime.QuadPart)) / m_numProcessors;
|
||||
|
||||
m_deltaPageFaultCount = stopPageFaultCount - m_startPageFaultCount;
|
||||
m_deltaPagefileUsage = (double)BYTE_TO_MB((double)stopPagefileUsage - (double)m_startPagefileUsage);
|
||||
m_deltaPeakPagefileUsage = (double)BYTE_TO_MB((double)stopPeakPagefileUsage - (double)m_startPeakPagefileUsage);
|
||||
m_deltaWorkingSetSize = (double)BYTE_TO_MB((double)stopWorkingSetSize - (double)m_startWorkingSetSize);
|
||||
m_deltaPeakWorkingSetSize = (double)BYTE_TO_MB((double)stopPeakWorkingSetSize - (double)m_startPeakWorkingSetSize);
|
||||
}
|
||||
|
||||
double GetProcessTime() { return m_processTime; }
|
||||
ULONG GetDeltaPageFaultCount() { return m_deltaPageFaultCount; }
|
||||
double GetDeltaPageFileUsage() { return m_deltaPagefileUsage; }
|
||||
double GetDeltaPeakPageFileUsage() { return m_deltaPeakPagefileUsage; }
|
||||
double GetDeltaWorkingSetUsage() { return m_deltaWorkingSetSize; }
|
||||
double GetDeltaPeakWorkingSetUsage() { return m_deltaPeakWorkingSetSize; }
|
||||
|
||||
private:
|
||||
|
||||
bool GetProcessMemoryCounters(DWORD pid, ULONG& pageFaultCount, SIZE_T& pageFileUsage, SIZE_T& peakPageFileUsage, SIZE_T& workingSetSize, SIZE_T& peakWorkingSetSize)
|
||||
{
|
||||
HANDLE hProcess = NULL;
|
||||
|
||||
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid);
|
||||
if (NULL == hProcess)
|
||||
return false;
|
||||
|
||||
PROCESS_MEMORY_COUNTERS pmc = { 0 };
|
||||
|
||||
bool result = GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc));
|
||||
if (result)
|
||||
{
|
||||
pageFaultCount = pmc.PageFaultCount;
|
||||
pageFileUsage = pmc.PagefileUsage;
|
||||
peakPageFileUsage = pmc.PeakPagefileUsage;
|
||||
workingSetSize = pmc.WorkingSetSize;
|
||||
peakWorkingSetSize = pmc.PeakWorkingSetSize;
|
||||
}
|
||||
|
||||
CloseHandle(hProcess);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ULARGE_INTEGER m_startKernelTime;
|
||||
ULARGE_INTEGER m_startUserTime;
|
||||
UINT m_numProcessors;
|
||||
HANDLE m_procHandle;
|
||||
DWORD m_pid;
|
||||
bool m_previousStartCallFailed;
|
||||
double m_processTime; // in second
|
||||
ULONG m_startPageFaultCount;
|
||||
SIZE_T m_startPagefileUsage; // in byte
|
||||
SIZE_T m_startPeakPagefileUsage; // in byte
|
||||
SIZE_T m_startWorkingSetSize; // in byte
|
||||
SIZE_T m_startPeakWorkingSetSize; // in byte
|
||||
ULONG m_deltaPageFaultCount;
|
||||
double m_deltaPagefileUsage; // in MByte
|
||||
double m_deltaPeakPagefileUsage; // in MByte
|
||||
double m_deltaWorkingSetSize; // in MByte
|
||||
double m_deltaPeakWorkingSetSize; // in MByte
|
||||
};
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
|
||||
class GpuPerfCounter
|
||||
{
|
||||
public:
|
||||
GpuPerfCounter() :
|
||||
m_hPDH(NULL),
|
||||
m_pfnPdhOpenQuery(NULL),
|
||||
m_pfnPdhAddCounter(NULL),
|
||||
m_pfnPdhCollectQueryData(NULL),
|
||||
m_pfnPdhGetFormattedCounterArray(NULL),
|
||||
m_pfnPdhGetFormattedCounterValue(NULL),
|
||||
m_pfnPdhCloseQuery(NULL),
|
||||
m_query(NULL)
|
||||
{
|
||||
//#ifdef DISABLE_LOADLIBRARY
|
||||
m_hPDH = LoadLibraryEx(L"pdh.dll", NULL, 0);
|
||||
//#endif
|
||||
if (m_hPDH != NULL)
|
||||
{
|
||||
m_pfnPdhOpenQuery = (PFNPdhOpenQuery)GetProcAddress(m_hPDH, "PdhOpenQueryW");
|
||||
m_pfnPdhAddCounter = (PFNPdhAddCounter)GetProcAddress(m_hPDH, "PdhAddCounterW");
|
||||
m_pfnPdhCollectQueryData = (PFNPdhCollectQueryData)GetProcAddress(m_hPDH, "PdhCollectQueryData");
|
||||
m_pfnPdhGetFormattedCounterArray = (PFNPdhGetFormattedCounterArray)GetProcAddress(m_hPDH, "PdhGetFormattedCounterArrayW");
|
||||
m_pfnPdhGetFormattedCounterValue = (PFNPdhGetFormattedCounterValue)GetProcAddress(m_hPDH, "PdhGetFormattedCounterValue");
|
||||
m_pfnPdhCloseQuery = (PFNPdhCloseQuery)GetProcAddress(m_hPDH, "PdhCloseQuery");
|
||||
}
|
||||
|
||||
Reset();
|
||||
}
|
||||
~GpuPerfCounter()
|
||||
{
|
||||
if (m_query)
|
||||
{
|
||||
CloseQuery(m_query);
|
||||
m_query = NULL;
|
||||
}
|
||||
|
||||
if (m_hPDH)
|
||||
{
|
||||
FreeLibrary(m_hPDH);
|
||||
m_hPDH = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
m_gpuUsage = 0;
|
||||
m_deltaGpuDedicatedMemory = 0;
|
||||
m_deltaGpuSharedMemory = 0;
|
||||
|
||||
// Setup PDH performance query
|
||||
std::wstring pidStr = std::to_wstring(GetCurrentProcessId());
|
||||
std::wstring gpuUsageQueryStr = L"\\GPU Engine(pid_*_*)\\Utilization Percentage";
|
||||
std::wstring gpuDedicatedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Dedicated Usage";
|
||||
std::wstring gpuSharedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Shared Usage";
|
||||
gpuUsageQueryStr.replace(gpuUsageQueryStr.find('*'), 1, pidStr);
|
||||
gpuDedicatedMemQueryStr.replace(gpuDedicatedMemQueryStr.find('*'), 1, pidStr);
|
||||
gpuSharedMemQueryStr.replace(gpuSharedMemQueryStr.find('*'), 1, pidStr);
|
||||
|
||||
// Open query
|
||||
if (m_query) CloseQuery(m_query);
|
||||
m_query = NULL;
|
||||
OpenQuery(NULL, NULL, &m_query);
|
||||
AddCounter(m_query, gpuUsageQueryStr.c_str(), NULL, &m_gpuUsageCounter);
|
||||
AddCounter(m_query, gpuDedicatedMemQueryStr.c_str(), NULL, &m_gpuDedicatedMemUsageCounter);
|
||||
AddCounter(m_query, gpuSharedMemQueryStr.c_str(), NULL, &m_gpuSharedMemUsageCounter);
|
||||
}
|
||||
|
||||
void Start()
|
||||
{
|
||||
PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {};
|
||||
PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {};
|
||||
PDH_STATUS status = S_OK;
|
||||
|
||||
// Usage rate counter require two queries. Put first one at Start() and second on at Stop()
|
||||
CollectQueryData(m_query);
|
||||
|
||||
// Gpu dedicated ememory
|
||||
status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue);
|
||||
m_startGpuDedicatedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue) : m_startGpuDedicatedMemory;
|
||||
|
||||
// Gpu shared ememory
|
||||
status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue);
|
||||
m_startGpuSharedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue) : m_startGpuSharedMemory;
|
||||
}
|
||||
|
||||
void Stop()
|
||||
{
|
||||
PDH_FMT_COUNTERVALUE_ITEM* gpuUsageCounterValue = nullptr;
|
||||
PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {};
|
||||
PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {};
|
||||
DWORD bufferSize = 0;
|
||||
DWORD itemCount = 0;
|
||||
PDH_STATUS status = S_OK;
|
||||
|
||||
// Query the gpu usage.
|
||||
// For different IHVs, compute shader usage could be counted as either 3D or compute engine usage.
|
||||
// Here we simply pick the max usage from all types of engines to see if bottleneck is from GPU.
|
||||
// The same concept has been used in task manager to display GPU usage.
|
||||
status = CollectQueryData(m_query);
|
||||
if (S_OK != status && PDH_NO_DATA != status)
|
||||
return;
|
||||
|
||||
status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue);
|
||||
if (PDH_MORE_DATA != status)
|
||||
return;
|
||||
|
||||
gpuUsageCounterValue = (PDH_FMT_COUNTERVALUE_ITEM *)malloc(bufferSize);
|
||||
if (gpuUsageCounterValue != nullptr)
|
||||
{
|
||||
status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue);
|
||||
if (ERROR_SUCCESS == status)
|
||||
{
|
||||
double maxValue = 0;
|
||||
for (size_t i = 0; i < itemCount; ++i)
|
||||
{
|
||||
maxValue = (gpuUsageCounterValue[i].FmtValue.doubleValue > maxValue) ? gpuUsageCounterValue[i].FmtValue.doubleValue : maxValue;
|
||||
}
|
||||
m_gpuUsage = maxValue;
|
||||
}
|
||||
}
|
||||
|
||||
free(gpuUsageCounterValue);
|
||||
gpuUsageCounterValue = NULL;
|
||||
bufferSize = 0;
|
||||
itemCount = 0;
|
||||
|
||||
double stopGpuDedicatedMemory; // in MB
|
||||
double stopGpuSharedMemory; // in MB
|
||||
|
||||
// Gpu dedicated ememory delta. Don't update the value if counter doesn't get values correctly.
|
||||
status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue);
|
||||
if (ERROR_SUCCESS == status)
|
||||
{
|
||||
stopGpuDedicatedMemory = (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue);
|
||||
m_deltaGpuDedicatedMemory = stopGpuDedicatedMemory - m_startGpuDedicatedMemory;
|
||||
}
|
||||
|
||||
// Gpu shared ememory. Don't update the value if counter doesn't get values correctly.
|
||||
status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue);
|
||||
if (ERROR_SUCCESS == status)
|
||||
{
|
||||
stopGpuSharedMemory = (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue);
|
||||
m_deltaGpuSharedMemory = stopGpuSharedMemory - m_startGpuSharedMemory;
|
||||
}
|
||||
}
|
||||
|
||||
double GetGpuUsage() const { return m_gpuUsage; }
|
||||
double GetDedicatedMemory() const { return m_deltaGpuDedicatedMemory; }
|
||||
double GetSharedMemory() const { return m_deltaGpuSharedMemory; }
|
||||
|
||||
private:
|
||||
// Pdh function prototypes
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhOpenQuery)(_In_opt_ LPCWSTR szDataSource, _In_ DWORD_PTR dwUserData, _Out_ PDH_HQUERY * phQuery);
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhAddCounter)(_In_ PDH_HQUERY hQuery, _In_ LPCWSTR szFullCounterPath, _In_ DWORD_PTR dwUserData, _Out_ PDH_HCOUNTER * phCounter);
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhCollectQueryData)(_Inout_ PDH_HQUERY hQuery);
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhGetFormattedCounterArray)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Inout_ LPDWORD lpdwBufferSize, _Out_ LPDWORD lpdwItemCount, _Out_writes_bytes_opt_(*lpdwBufferSize) PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer);
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhGetFormattedCounterValue)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Out_opt_ LPDWORD lpdwType, _Out_ PPDH_FMT_COUNTERVALUE pValue);
|
||||
typedef PDH_STATUS(WINAPI *PFNPdhCloseQuery)(_Inout_ PDH_HQUERY hQuery);
|
||||
|
||||
PDH_STATUS OpenQuery(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY * phQuery)
|
||||
{
|
||||
return (m_pfnPdhOpenQuery) ? m_pfnPdhOpenQuery(szDataSource, dwUserData, phQuery) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
PDH_STATUS AddCounter(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER * phCounter)
|
||||
{
|
||||
return (m_pfnPdhAddCounter) ? m_pfnPdhAddCounter(hQuery, szFullCounterPath, dwUserData, phCounter) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
PDH_STATUS CollectQueryData(PDH_HQUERY hQuery)
|
||||
{
|
||||
return (m_pfnPdhCollectQueryData) ? m_pfnPdhCollectQueryData(hQuery) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
PDH_STATUS GetFormattedCounterArray(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwBufferSize, LPDWORD lpdwItemCount, PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer)
|
||||
{
|
||||
return (m_pfnPdhGetFormattedCounterArray) ? m_pfnPdhGetFormattedCounterArray(hCounter, dwFormat, lpdwBufferSize, lpdwItemCount, ItemBuffer) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
PDH_STATUS GetFormattedCounterValue(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue)
|
||||
{
|
||||
return (m_pfnPdhGetFormattedCounterValue) ? m_pfnPdhGetFormattedCounterValue(hCounter, dwFormat, lpdwType, pValue) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
PDH_STATUS CloseQuery(PDH_HQUERY hQuery)
|
||||
{
|
||||
return (m_pfnPdhCloseQuery) ? m_pfnPdhCloseQuery(hQuery) : ERROR_MOD_NOT_FOUND;
|
||||
}
|
||||
|
||||
// PDH Performance Query
|
||||
HMODULE m_hPDH;
|
||||
PFNPdhOpenQuery m_pfnPdhOpenQuery;
|
||||
PFNPdhAddCounter m_pfnPdhAddCounter;
|
||||
PFNPdhCollectQueryData m_pfnPdhCollectQueryData;
|
||||
PFNPdhGetFormattedCounterArray m_pfnPdhGetFormattedCounterArray;
|
||||
PFNPdhGetFormattedCounterValue m_pfnPdhGetFormattedCounterValue;
|
||||
PFNPdhCloseQuery m_pfnPdhCloseQuery;
|
||||
HQUERY m_query;
|
||||
HCOUNTER m_gpuUsageCounter;
|
||||
HCOUNTER m_gpuDedicatedMemUsageCounter;
|
||||
HCOUNTER m_gpuSharedMemUsageCounter;
|
||||
// Process info
|
||||
DWORD m_pid;
|
||||
// Data
|
||||
double m_gpuUsage;
|
||||
double m_startGpuDedicatedMemory; // in MB
|
||||
double m_startGpuSharedMemory; // in MB
|
||||
double m_deltaGpuDedicatedMemory; // in MB
|
||||
double m_deltaGpuSharedMemory; // in MB
|
||||
};
|
||||
|
||||
#endif
|
||||
typedef enum CounterType
|
||||
{
|
||||
TIMER = 0,
|
||||
CPU_USAGE,
|
||||
PAGE_FAULT_COUNT,
|
||||
PAGE_FILE_USAGE,
|
||||
PEAK_PAGE_FILE_USAGE,
|
||||
WORKING_SET_USAGE,
|
||||
PEAK_WORKING_SET_USAGE,
|
||||
GPU_USAGE,
|
||||
GPU_DEDICATED_MEM_USAGE,
|
||||
GPU_SHARED_MEM_USAGE,
|
||||
TYPE_COUNT
|
||||
} CounterType;
|
||||
|
||||
const static std::vector<std::wstring> CounterTypeName =
|
||||
{
|
||||
L"TIMER",
|
||||
L"CPU USAGE",
|
||||
L"PAGE FAULT COUNT",
|
||||
L"PAGE FILE USAGE",
|
||||
L"PEAK PAGE FILE USAGE",
|
||||
L"WORKING SET USAGE",
|
||||
L"PEAK WORK SET USAGE",
|
||||
L"GPU USAGE",
|
||||
L"GPU_DEDICATED_MEM_USAGE",
|
||||
L"GPU_SHARED_MEM_USAGE"
|
||||
};
|
||||
|
||||
// A statistics helper for Timer/CpuPerfCounter/GpuPerfCounter class.
|
||||
// It keeps the latest "TIMER_SLOT_SIZE" measured data in a ring buffer.
|
||||
// The statistic functions (e.g. GetVariance) assume data always starts from index 0 of the buffer.
|
||||
class PerfCounterStatistics
|
||||
{
|
||||
public:
|
||||
PerfCounterStatistics() : m_bDisabled(true)
|
||||
{
|
||||
}
|
||||
|
||||
void Enable()
|
||||
{
|
||||
m_bDisabled = false;
|
||||
}
|
||||
|
||||
void Disable()
|
||||
{
|
||||
m_bDisabled = true;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
if (m_bDisabled)
|
||||
return;
|
||||
|
||||
m_pos = 0;
|
||||
m_bBufferFull = false;
|
||||
m_cpuCounter.Reset();
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
m_gpuCounter.Reset();
|
||||
#endif
|
||||
for (int i = 0; i < CounterType::TYPE_COUNT; ++i)
|
||||
{
|
||||
m_data[i].Reset();
|
||||
}
|
||||
}
|
||||
|
||||
void Start()
|
||||
{
|
||||
if (m_bDisabled)
|
||||
return;
|
||||
|
||||
m_timer.Start();
|
||||
m_cpuCounter.Start();
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
m_gpuCounter.Start();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Stop()
|
||||
{
|
||||
if (m_bDisabled)
|
||||
return;
|
||||
|
||||
double counterValue[CounterType::TYPE_COUNT];
|
||||
|
||||
// Query counters
|
||||
double time = m_timer.Stop();
|
||||
m_cpuCounter.Stop();
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
m_gpuCounter.Stop();
|
||||
#endif
|
||||
|
||||
// Get counter values
|
||||
counterValue[CounterType::TIMER] = time;
|
||||
counterValue[CounterType::CPU_USAGE] = 100.0 * m_cpuCounter.GetProcessTime() / time;
|
||||
counterValue[CounterType::PAGE_FAULT_COUNT] = m_cpuCounter.GetDeltaPageFaultCount();
|
||||
counterValue[CounterType::PAGE_FILE_USAGE] = m_cpuCounter.GetDeltaPageFileUsage();
|
||||
counterValue[CounterType::PEAK_PAGE_FILE_USAGE] = m_cpuCounter.GetDeltaPeakPageFileUsage();
|
||||
counterValue[CounterType::WORKING_SET_USAGE] = m_cpuCounter.GetDeltaWorkingSetUsage();
|
||||
counterValue[CounterType::PEAK_WORKING_SET_USAGE] = m_cpuCounter.GetDeltaPeakWorkingSetUsage();
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
counterValue[CounterType::GPU_USAGE] = m_gpuCounter.GetGpuUsage();
|
||||
counterValue[CounterType::GPU_DEDICATED_MEM_USAGE] = m_gpuCounter.GetDedicatedMemory();
|
||||
counterValue[CounterType::GPU_SHARED_MEM_USAGE] = m_gpuCounter.GetSharedMemory();
|
||||
#endif
|
||||
// Update data blocks
|
||||
for (int i = 0; i < CounterType::TYPE_COUNT; ++i)
|
||||
{
|
||||
m_data[i].total = m_data[i].total - m_data[i].measured[m_pos] + counterValue[i];
|
||||
m_data[i].measured[m_pos] = counterValue[i];
|
||||
m_data[i].max = (counterValue[i] > m_data[i].max) ? counterValue[i] : m_data[i].max;
|
||||
m_data[i].min = (counterValue[i] < m_data[i].min) ? counterValue[i] : m_data[i].min;
|
||||
}
|
||||
|
||||
// Update buffer index
|
||||
if (m_pos + 1 >= TIMER_SLOT_SIZE)
|
||||
{
|
||||
m_pos = 0;
|
||||
m_bBufferFull = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
++m_pos;
|
||||
}
|
||||
}
|
||||
|
||||
int GetCount() const { return (m_bBufferFull) ? TIMER_SLOT_SIZE : m_pos; }
|
||||
double GetAverage(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].total / GetCount(); }
|
||||
double GetMin(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].min; }
|
||||
double GetMax(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].max; }
|
||||
double GetValues(CounterType t, int index) const { return (m_bDisabled) ? 0 : m_data[t].measured[index]; }
|
||||
double GetStdev(CounterType t) const { return (m_bDisabled) ? 0 : sqrt(GetVariance(t)); }
|
||||
double GetVariance(CounterType t) const
|
||||
{
|
||||
if (m_bDisabled)
|
||||
return 0;
|
||||
|
||||
int count = GetCount();
|
||||
double average = m_data[t].total / count;
|
||||
double var = 0;
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
var += (m_data[t].measured[i] - average) * (m_data[t].measured[i] - average);
|
||||
}
|
||||
return var / count;
|
||||
}
|
||||
|
||||
private:
|
||||
struct DataBlock
|
||||
{
|
||||
void Reset()
|
||||
{
|
||||
max = 0;
|
||||
min = DBL_MAX;
|
||||
total = 0;
|
||||
memset(measured, 0, sizeof(double)*TIMER_SLOT_SIZE);
|
||||
}
|
||||
|
||||
double max;
|
||||
double min;
|
||||
double total;
|
||||
double measured[TIMER_SLOT_SIZE];
|
||||
};
|
||||
|
||||
int m_pos;
|
||||
bool m_bBufferFull;
|
||||
bool m_bDisabled;
|
||||
|
||||
Timer m_timer;
|
||||
CpuPerfCounter m_cpuCounter;
|
||||
#ifndef DISABLE_GPU_COUNTERS
|
||||
GpuPerfCounter m_gpuCounter;
|
||||
#endif
|
||||
DataBlock m_data[CounterType::TYPE_COUNT];
|
||||
};
|
||||
|
||||
// A class to wrap up multiple PerfCounterStatistics objects.
|
||||
// To create a profiler, define intervals in an enum and use it to create the profiler object.
|
||||
// See an example in engine/test/Model/ModelTest.cpp
|
||||
template<typename T>
|
||||
class Profiler
|
||||
{
|
||||
public:
|
||||
void Reset(int begin, int end)
|
||||
{
|
||||
for (int i = begin; i < end; ++i)
|
||||
{
|
||||
m_perfCounterStat[i].Reset();
|
||||
}
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
Reset(0, T::COUNT);
|
||||
}
|
||||
|
||||
PerfCounterStatistics& GetCounter(int t)
|
||||
{
|
||||
return m_perfCounterStat[t];
|
||||
}
|
||||
|
||||
PerfCounterStatistics& operator [] (int t)
|
||||
{
|
||||
return m_perfCounterStat[t];
|
||||
}
|
||||
|
||||
void Enable()
|
||||
{
|
||||
for (int i = 0; i < T::COUNT; ++i)
|
||||
{
|
||||
m_perfCounterStat[i].Enable();
|
||||
}
|
||||
}
|
||||
|
||||
void Disable()
|
||||
{
|
||||
for (int i = 0; i < T::COUNT; ++i)
|
||||
{
|
||||
m_perfCounterStat[i].Disable();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
PerfCounterStatistics m_perfCounterStat[T::COUNT];
|
||||
};
|
||||
|
||||
#define WINML_PROFILING
|
||||
|
||||
#ifdef WINML_PROFILING
|
||||
#define WINML_PROFILING_START(profiler, interval) profiler[interval].Start()
|
||||
#define WINML_PROFILING_STOP(profiler, interval) profiler[interval].Stop()
|
||||
#else
|
||||
#define WINML_PROFILING_START(profiler, interval) do {} while(0)
|
||||
#define WINML_PROFILING_STOP(profiler, interval) do {} while(0)
|
||||
#endif
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.27004.2005
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WinMLModelRunner", "WinMLModelRunner.vcxproj", "{81EA9CC6-8A26-4583-B1A4-84740EF815C8}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x64.Build.0 = Debug|x64
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x86.Build.0 = Debug|Win32
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x64.ActiveCfg = Release|x64
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x64.Build.0 = Release|x64
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x86.ActiveCfg = Release|Win32
|
||||
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {D193B2D4-1FF5-4E14-9334-E5EF4C8F9069}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,151 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="CommandLineArgs.cpp" />
|
||||
<ClCompile Include="Main.cpp" />
|
||||
<ClCompile Include="Stopwatch.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="BindingUtilities.h" />
|
||||
<ClInclude Include="CommandLineArgs.h" />
|
||||
<ClInclude Include="Common.h" />
|
||||
<ClInclude Include="OutputHelper.h" />
|
||||
<ClInclude Include="ModelBinding.h" />
|
||||
<ClInclude Include="Stopwatch.h" />
|
||||
<ClInclude Include="TimerHelper.h" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<CppWinRTEnabled>true</CppWinRTEnabled>
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{81EA9CC6-8A26-4583-B1A4-84740EF815C8}</ProjectGuid>
|
||||
<RootNamespace>Benchmark</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0.17713.0</WindowsTargetPlatformVersion>
|
||||
<ProjectName>WinMLModelRunner</ProjectName>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<IntDir>$(Platform)\$(Configuration)\ to $(Platform)\$(Configuration)\$(Benchmark)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ShowIncludes>true</ShowIncludes>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>dxgi.lib;d3d12.lib;windowsapp.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ShowIncludes>true</ShowIncludes>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ShowIncludes>true</ShowIncludes>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,33 @@
|
|||
The WinMLModelRunner program can run .onnx or .pb models where the input and output variables
|
||||
are tensors. It allows you to run WinML on the GPU or CPU, and if neither are specified will
|
||||
run the test multiple times to generate separate GPU and CPU performance measurements. The GPU,
|
||||
CPU and wall-clock times for loading, binding, and evaluating and the CPU and GPU memory usage during
|
||||
evaluate will print to the command line and to a CSV file.
|
||||
|
||||
If no csv file name is specified, the program will create csv titled
|
||||
"WinML Model Run [Today's date].csv" in the same folder as the .exe file.
|
||||
|
||||
Command-Line Options:
|
||||
---------------------------------------------------------------------------------------
|
||||
Required command-Line arguments:
|
||||
-model <path> : Path to a .onnx model file.
|
||||
|
||||
-folder <path> : Path to a folder with .onnx models, will run all of the models in the folder.
|
||||
|
||||
Optional command-line arguments:
|
||||
-iterations <int> : Number of times to evaluate the model.
|
||||
-CPU : Will create a session on the CPU.
|
||||
-GPU : Will create a session on the GPU.
|
||||
-csv <file name> : Will create a CSV file and output the performance measurements to it.
|
||||
|
||||
Examples:
|
||||
---------------------------------------------------------------------------------------
|
||||
Run 'concat' operator on the CPU and GPU separately 5 times:
|
||||
> WinMLModelRunner.exe -model c:\\data\\concat.onnx -iterations 5
|
||||
|
||||
Run all the models in the data folder 3 times using only the CPU:
|
||||
> WinMLModelRunner .exe -folder c:\\data -iterations 3 -CPU
|
||||
|
||||
Run all of the models in the data folder on the GPU and CPU once and output the
|
||||
performance data to benchmarkdata.csv:
|
||||
> WinMLModelRunner.exe -folder c:\\data -csv benchmarkdata.csv
|
Загрузка…
Ссылка в новой задаче