This commit is contained in:
Paul McDaniel 2018-07-27 14:50:46 -07:00
Родитель 1ef6ae729d
Коммит 9fd8281724
13 изменённых файлов: 1607 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,109 @@
#pragma once
#include "Common.h"
#include "ModelBinding.h"
using namespace winrt::Windows::AI::MachineLearning;
namespace BindingUtilities
{
void BindTensorsFromGarbageData(LearningModelBinding context, LearningModel model) {
for (auto&& description : model.InputFeatures())
{
if (description == nullptr)
{
ThrowFailure(L" Learning model has no binding description.");
}
hstring name = description.Name();
TensorFeatureDescriptor tensorDescriptor = description.as<TensorFeatureDescriptor>();
TensorKind tensorKind = tensorDescriptor.TensorKind();
switch (tensorKind) {
case TensorKind::Undefined:
{
ThrowFailure(L" TensorKind is undefined.");
}
case TensorKind::Float:
{
ModelBinding<float> binding(description);
ITensor tensor = TensorFloat::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::Double:
{
ModelBinding<double> binding(description);
ITensor tensor = TensorDouble::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::Int8:
{
ModelBinding<uint8_t> binding(description);
ITensor tensor = TensorInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::UInt8:
{
ModelBinding<uint8_t> binding(description);
ITensor tensor = TensorUInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::Int16:
{
ModelBinding<int16_t> binding(description);
ITensor tensor = TensorInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::UInt16:
{
ModelBinding<uint16_t> binding(description);
ITensor tensor = TensorUInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::Int32:
{
ModelBinding<int32_t> binding(description);
ITensor tensor = TensorInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::UInt32:
{
ModelBinding<uint32_t> binding(description);
ITensor tensor = TensorUInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::Int64:
{
ModelBinding<int64_t> binding(description);
ITensor tensor = TensorInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
case TensorKind::UInt64:
{
ModelBinding<uint64_t> binding(description);
ITensor tensor = TensorUInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
context.Bind(name, tensor);
}
break;
default:
{
ThrowFailure(L"TensorKind has not been implemented.");
break;
}
}
}
}
void BindGarbageDataToContext(LearningModelBinding context, LearningModel model) {
context.Clear();
BindTensorsFromGarbageData(context, model);
}
};

Просмотреть файл

@ -0,0 +1,42 @@
#include <Windows.h>
#include <string>
#include "CommandLineArgs.h"
CommandLineArgs::CommandLineArgs()
{
int numArgs = 0;
LPWSTR* args = CommandLineToArgvW(GetCommandLineW(), &numArgs);
for (int i = 0; i < numArgs; i++)
{
if ((_wcsicmp(args[i], L"-CPU") == 0))
{
m_useCPU = true;
}
else if ((_wcsicmp(args[i], L"-GPU") == 0))
{
m_useGPU = true;
}
if ((_wcsicmp(args[i], L"-iterations") == 0) && (i + 1 < numArgs))
{
m_numIterations = static_cast<UINT>(_wtoi(args[++i]));
}
else if ((_wcsicmp(args[i], L"-model") == 0) && (i + 1 < numArgs))
{
m_modelPath = args[++i];
}
else if ((_wcsicmp(args[i], L"-folder") == 0) && (i + 1 < numArgs))
{
m_folderPath = args[++i];
}
else if ((_wcsicmp(args[i], L"-disableMetacommands") == 0))
{
m_metacommandsEnabled = false;
}
else if ((_wcsicmp(args[i], L"-csv") == 0))
{
m_csvFileName = args[++i];
}
}
m_useCPUandGPU = m_useCPU == m_useGPU;
}

Просмотреть файл

@ -0,0 +1,28 @@
#pragma once
class CommandLineArgs
{
public:
CommandLineArgs();
bool UseCPU() const { return m_useCPU; }
bool UseGPU() const { return m_useGPU; }
bool UseCPUandGPU() const { return m_useCPUandGPU; }
const std::wstring& ModelPath() const { return m_modelPath; }
void SetModelPath(std::wstring path) { m_modelPath = path; }
const std::wstring& FolderPath() const { return m_folderPath; }
UINT NumIterations() const { return m_numIterations; }
std::wstring CsvFileName() { return m_csvFileName; }
bool MetacommandsEnabled() const { return m_metacommandsEnabled; }
private:
bool m_useCPU = false;
bool m_useGPU = false;
bool m_useCPUandGPU = false;
std::wstring m_folderPath;
std::wstring m_modelPath;
std::wstring m_csvFileName;
UINT m_numIterations = 1;
bool m_metacommandsEnabled = false;
};

Просмотреть файл

@ -0,0 +1,91 @@
#pragma once
#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS
#include <winrt/Windows.AI.MachineLearning.h>
#include <winrt/Windows.Foundation.h>
#include <winrt/Windows.Media.h>
#include <winrt/Windows.Graphics.Imaging.h>
#include <winrt/Windows.Media.h>
#include "winrt/Windows.Storage.h"
#include <winrt/Windows.Storage.Streams.h>
#include "TimerHelper.h"
#include <vector>
#include <string>
#include <iostream>
#include <atlbase.h>
#include <sstream>
#include <comdef.h>
#include <algorithm>
#include <numeric>
#include <cassert>
#include <fstream>
#include <future>
#include <thread>
#include <dxgi1_6.h>
enum WINML_MODEL_TEST_PERF
{
ENTIRE_TEST = 0,
LOAD_MODEL,
CREATE_SESSION,
BIND_VALUE,
EVAL_MODEL,
EVAL_MODEL_FIRST_RUN,
COUNT
};
static std::vector<std::wstring> WINML_MODEL_TEST_PERF_NAMES =
{
L"ENTIRE TEST ",
L" LOAD MODEL ",
L" CREATE SESSION ",
L" BIND VALUE ",
L" EVAL MODEL ",
};
#define MAX_PROFILING_LOOP 100
using namespace winrt;
inline std::wstring MakeErrorMsg(HRESULT hr)
{
std::wostringstream ss;
ss << L"0x" << std::hex << hr << ": " << _com_error(hr).ErrorMessage();
return ss.str();
}
inline std::wstring MakeErrorMsg(HRESULT hr, const std::wstring &errorMsg)
{
std::wostringstream ss;
ss << errorMsg << L" (" << (MakeErrorMsg(hr)) << L")";
return ss.str();
}
inline void WriteErrorMsg(const std::wstring &errorMsg)
{
std::wostringstream ss;
ss << L"ERROR: " << errorMsg << std::endl;
OutputDebugStringW(ss.str().c_str());
std::wcout << ss.str() << std::endl;
}
inline void WriteErrorMsg(HRESULT hr, const std::wstring &errorMsg = L"")
{
std::wostringstream ss;
ss << errorMsg << L" (" << (MakeErrorMsg(hr)) << L")";
WriteErrorMsg(ss.str());
}
inline void ThrowIfFailed(HRESULT hr, const std::wstring &errorMsg = L"")
{
if (FAILED(hr))
{
throw MakeErrorMsg(hr, errorMsg);
}
}
inline void ThrowFailure(const std::wstring &errorMsg)
{
throw errorMsg;
}

Просмотреть файл

@ -0,0 +1,153 @@
#include "Common.h"
#include "OutputHelper.h"
#include "ModelBinding.h"
#include "BindingUtilities.h"
#include "Stopwatch.h"
#include "CommandLineArgs.h"
#include <filesystem>
#define CheckHr(expr, errorMsg) hr = (expr); if (FAILED(hr)) { WriteErrorMsg(hr, errorMsg); return 1; }
Profiler<WINML_MODEL_TEST_PERF> g_Profiler;
int g_GarbageRuns = 10;
// Loads, binds, and evaluates the user-specified model and outputs the GPU/CPU and
// wall-clock times(in ms) for each step to the command line.
void EvaluateModel(CommandLineArgs args, std::wstring modelName, OutputHelper * output, LearningModelDeviceKind deviceKind)
{
Stopwatch timer;
output->PrintModelInfo(modelName, deviceKind);
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
timer.Click();
LearningModel model = nullptr;
try
{
model = LearningModel::LoadFromFilePath(args.ModelPath());
}
catch (const std::wstring &msg)
{
WriteErrorMsg(msg);
return;
}
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
timer.Click();
output->m_clockLoadTime = timer.GetElapsedMilliseconds();
LearningModelSession session(model, LearningModelDevice(deviceKind));
LearningModelBinding binding(session);
// Initialize model input and bind garbage data.
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
timer.Click();
try
{
BindingUtilities::BindGarbageDataToContext(binding, model);
}
catch (const std::wstring &msg)
{
WriteErrorMsg(msg);
return;
}
timer.Click();
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
output->m_clockBindTime = timer.GetElapsedMilliseconds();
for (int i = 0; i < g_GarbageRuns; i++) {
auto result = session.Evaluate(binding, L"");
}
for (UINT i = 0; i < args.NumIterations(); i++)
{
WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
timer.Click();
auto result = session.Evaluate(binding, L"");
timer.Click();
WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
output->m_clockEvalTimes.push_back(timer.GetElapsedMilliseconds());
}
output->PrintWallClockTimes(args.NumIterations());
if (deviceKind == LearningModelDeviceKind::DirectX)
{
output->PrintGPUTimes(g_Profiler, args.NumIterations());
}
else
{
output->PrintCPUTimes(g_Profiler, args.NumIterations());
}
g_Profiler.Reset();
}
void EvaluateModelsInDirectory(CommandLineArgs args, OutputHelper * output)
{
std::wstring folderPath = args.FolderPath();
for (auto & it : std::filesystem::directory_iterator(args.FolderPath()))
{
std::string path = it.path().string();
if (it.path().string().find(".onnx") != std::string::npos ||
it.path().string().find(".pb") != std::string::npos)
{
std::wstring fileName;
fileName.assign(path.begin(), path.end());
args.SetModelPath(fileName);
try
{
if (args.UseCPUandGPU() || args.UseGPU())
{
EvaluateModel(args, args.ModelPath(), output, LearningModelDeviceKind::DirectX);
}
if (args.UseCPUandGPU() || args.UseCPU())
{
EvaluateModel(args, args.ModelPath(), output, LearningModelDeviceKind::Cpu);
}
output->WritePerformanceDataToCSV(g_Profiler, args, fileName);
output->Reset();
}
catch (const std::wstring &msg)
{
WriteErrorMsg(msg);
continue;
}
}
}
}
int main(int argc, char** argv)
{
CommandLineArgs args;
OutputHelper output;
winrt::init_apartment();
output.PrintHardwareInfo();
g_Profiler.Enable();
std::wstring csvFileName = args.CsvFileName();
if (csvFileName.empty())
{
output.SetDefaultCSVFileName();
}
else
{
output.m_csvFileName = csvFileName;
}
if (!args.ModelPath().empty())
{
if (args.UseCPUandGPU() || args.UseGPU())
{
EvaluateModel(args, args.ModelPath(), &output, LearningModelDeviceKind::DirectX);
}
if (args.UseCPUandGPU() || args.UseCPU())
{
EvaluateModel(args, args.ModelPath(), &output, LearningModelDeviceKind::Cpu);
}
output.WritePerformanceDataToCSV(g_Profiler, args, args.ModelPath());
output.Reset();
}
else if (!args.FolderPath().empty())
{
EvaluateModelsInDirectory(args, &output);
}
return 0;
}

Просмотреть файл

@ -0,0 +1,95 @@
#pragma once
#include "Common.h"
// Data storage for a model input or output variable.
template< typename T>
class ModelBinding
{
public:
ModelBinding(winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor variableDesc) : m_bindingDesc(variableDesc)
{
UINT numElements = 0;
if (variableDesc.Kind() == LearningModelFeatureKind::Tensor)
{
InitTensorBinding(variableDesc, numElements);
}
else
{
ThrowFailure(L"ModelBinding: Binding feature type not implemented");
}
}
winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor GetDesc()
{
return m_bindingDesc;
}
UINT GetNumElements() const
{
return m_numElements;
}
UINT GetElementSize() const
{
return m_elementSize;
}
std::vector<INT64> GetShapeBuffer()
{
return m_shapeBuffer;
}
void* GetData()
{
return m_dataBuffer.data();
}
std::vector<T> GetDataBuffer()
{
return m_dataBuffer;
}
size_t GetDataBufferSize()
{
return m_dataBuffer.size();
}
private:
void InitNumElementsAndShape(winrt::Windows::Foundation::Collections::IVectorView<int64_t> * shape, UINT numDimensions, UINT numElements)
{
int unknownDim = -1;
UINT numKnownElements = 1;
for (UINT dim = 0; dim < numDimensions; dim++)
{
INT64 dimSize = shape->GetAt(dim);
if (dimSize <= 0)
{
if (unknownDim == -1)
{
dimSize = 1;
}
}
else
{
numKnownElements *= static_cast<UINT>(dimSize);
}
m_shapeBuffer.push_back(dimSize);
}
m_numElements = numKnownElements;
}
void InitTensorBinding(winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor descriptor, UINT numElements)
{
auto tensorDescriptor = descriptor.as<winrt::Windows::AI::MachineLearning::TensorFeatureDescriptor>();
InitNumElementsAndShape(&tensorDescriptor.Shape(), tensorDescriptor.Shape().Size(), 1);
m_dataBuffer.resize(m_numElements);
}
winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor m_bindingDesc;
std::vector<INT64> m_shapeBuffer;
UINT m_numElements = 0;
std::vector<T> m_dataBuffer;
};

Просмотреть файл

@ -0,0 +1,221 @@
#pragma once
#include "Common.h"
#include <time.h>
#include "CommandLineArgs.h"
#include <fstream>
#include <ctime>
#include <locale>
#include <utility>
#include <codecvt>
#include <iomanip>
#include <windows.h>
#include <stdio.h>
using namespace winrt::Windows::AI::MachineLearning;
// Stores performance information and handles output to the command line and CSV files.
class OutputHelper
{
public:
OutputHelper() {}
void PrintWallClockTimes(UINT iterations)
{
double totalEvalTime = std::accumulate(m_clockEvalTimes.begin(), m_clockEvalTimes.end(), 0.0);
m_clockEvalTime = totalEvalTime / (double)iterations;
std::cout << "Wall-clock Time Averages (iterations = " << iterations << "):" << std::endl;
std::cout << " Load: " << m_clockLoadTime << " ms" << std::endl;
std::cout << " Bind: " << m_clockBindTime << " ms" << std::endl;
std::cout << " Evaluate: " << m_clockEvalTime << " ms" << std::endl;
std::cout << " Total time: " << m_clockLoadTime + m_clockBindTime + m_clockEvalTime << " ms" << std::endl;
std::cout << std::endl;
}
void PrintCPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
{
m_CPULoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
m_CPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
m_CPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
m_CPUEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::CPU_USAGE);
std::cout << "CPU Time Averages (iterations = " << iterations << "):" << std::endl;
std::cout << " Load: " << m_CPULoadTime << " ms" << std::endl;
std::cout << " Bind: " << m_CPUBindTime << " ms" << std::endl;
std::cout << " Evaluate: " << m_CPUEvalTime << " ms" << std::endl;
std::cout << " Total time: " << m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << " ms" << std::endl;
std::cout << " Evaluate memory usage: " << m_CPUEvalMemoryUsage << " mb" << std::endl;
std::cout << std::endl;
}
void PrintGPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
{
m_GPULoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
m_GPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
m_GPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
m_GPUEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::CPU_USAGE);
std::cout << "GPU Time Averages (iterations = " << iterations << "):" << std::endl;
std::cout << " Load: " << m_GPULoadTime << " ms" << std::endl;
std::cout << " Bind: " << m_GPUBindTime << " ms" << std::endl;
std::cout << " Evaluate: " << m_GPUEvalTime << " ms" << std::endl;
std::cout << " Total time: " << m_GPULoadTime + m_GPUBindTime + m_GPUEvalTime << " ms" << std::endl;
std::cout << " Evaluate memory usage: " << m_GPUEvalMemoryUsage << " mb" << std::endl;
std::cout << std::endl;
}
void PrintModelInfo(std::wstring modelName, LearningModelDeviceKind deviceKind)
{
std::wstring device = deviceKind == LearningModelDeviceKind::Cpu ? L" [CPU]" : L" [GPU]";
std::wcout << modelName << device << std::endl;
std::cout << "=================================================================" << std::endl;
std::cout << std::endl;
}
void PrintHardwareInfo()
{
std::cout << "WinML Model Runner" << std::endl;
com_ptr<IDXGIFactory6> factory;
(CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void()));
com_ptr<IDXGIAdapter> adapter;
factory->EnumAdapters(0, adapter.put());
DXGI_ADAPTER_DESC description;
if (SUCCEEDED(adapter->GetDesc(&description)))
{
std::wcout << L"GPU: " << description.Description << std::endl;
std::cout << std::endl;
}
}
void SetDefaultCSVFileName()
{
auto time = std::time(nullptr);
struct tm localTime;
localtime_s(&localTime, &time);
std::ostringstream oss;
oss << std::put_time(&localTime, "%Y-%m-%d");
std::string fileName = "WinML Model Run [" + oss.str() + "].csv";
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
m_csvFileName = converter.from_bytes(fileName);
}
void WritePerformanceDataToCSV(Profiler<WINML_MODEL_TEST_PERF> &g_Profiler, CommandLineArgs args, std::wstring model)
{
if (m_csvFileName.length() > 0)
{
// Check if header exists
bool bNewFile = false;
std::ifstream fin;
fin.open(m_csvFileName);
std::filebuf* outbuf = fin.rdbuf();
if (EOF == outbuf->sbumpc())
{
bNewFile = true;
}
fin.close();
std::ofstream fout;
fout.open(m_csvFileName, std::ios_base::app);
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::string modelName = converter.to_bytes(model);
if (bNewFile)
{
fout << "Model Name" << ","
<< "Iterations" << ",";
if (args.UseCPUandGPU() || args.UseCPU())
{
fout << "CPU Load (ms)" << ","
<< "CPU Bind (ms)" << ","
<< "CPU Evaluate (ms)" << ","
<< "CPU total time (ms)" << ","
<< "CPU Usage (Evaluate) (mb)" << ",";
}
if (args.UseCPUandGPU() || args.UseGPU())
{
fout << "GPU Load (ms)" << ","
<< "GPU Bind (ms)" << ","
<< "GPU Evaluate (ms)" << ","
<< "GPU total time (ms)" << ","
<< "GPU Usage (Evaluate) (mb)" << ",";
}
fout << "Wall-clock Load (ms)" << ","
<< "Wall-clock Bind (ms)" << ","
<< "Wall-clock Evaluate (ms)" << ","
<< "Wall-clock total time (ms)" << ","
<< std::endl;
}
fout << modelName << "," << args.NumIterations() << ",";
if (args.UseCPUandGPU() || args.UseCPU())
{
fout << m_CPULoadTime << ","
<< m_CPUBindTime << ","
<< m_CPUEvalTime << ","
<< m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << ","
<< m_CPUEvalMemoryUsage << ",";
}
if (args.UseCPUandGPU() || args.UseGPU())
{
fout << m_GPULoadTime << ","
<< m_GPUBindTime << ","
<< m_GPUEvalTime << ","
<< m_GPULoadTime + m_GPUBindTime + m_GPUEvalTime << ","
<< m_GPUEvalMemoryUsage << ",";
}
fout << m_clockLoadTime << ","
<< m_clockBindTime << ","
<< m_clockEvalTime << ","
<< m_clockLoadTime + m_clockBindTime + m_clockEvalTime << ","
<< std::endl;
fout.close();
}
}
void Reset()
{
m_GPULoadTime = 0;
m_GPUBindTime = 0;
m_GPUEvalTime = 0;
m_GPUEvalMemoryUsage = 0;
m_CPULoadTime = 0;
m_CPUBindTime = 0;
m_CPUEvalTime = 0;
m_CPUEvalMemoryUsage = 0;
m_clockLoadTime = 0;
m_clockBindTime = 0;
m_clockEvalTime = 0;
}
double m_clockLoadTime = 0;
double m_clockBindTime = 0;
std::vector<double> m_clockEvalTimes;
std::wstring m_csvFileName;
private:
double m_GPULoadTime = 0;
double m_GPUBindTime = 0;
double m_GPUEvalTime = 0;
double m_GPUEvalMemoryUsage = 0;
double m_CPULoadTime = 0;
double m_CPUBindTime = 0;
double m_CPUEvalTime = 0;
double m_CPUEvalMemoryUsage = 0;
double m_clockEvalTime = 0;
};

Просмотреть файл

@ -0,0 +1,16 @@
#include <Windows.h>
#include "Stopwatch.h"
Stopwatch::Stopwatch()
{
QueryPerformanceFrequency(&m_frequency);
}
void Stopwatch::Click()
{
LARGE_INTEGER currentTime;
QueryPerformanceCounter(&currentTime);
auto delta = static_cast<double>(currentTime.QuadPart - m_lastClickTime.QuadPart);
m_elapsedMilliseconds = delta / (static_cast<double>(m_frequency.QuadPart) / 1000.0);
m_lastClickTime = currentTime;
}

Просмотреть файл

@ -0,0 +1,20 @@
#pragma once
class Stopwatch
{
public:
Stopwatch();
void Click();
// Time elapsed between last two clicks.
inline double GetElapsedMilliseconds() const
{
return m_elapsedMilliseconds;
}
private:
LARGE_INTEGER m_lastClickTime;
LARGE_INTEGER m_frequency;
double m_elapsedMilliseconds = 0.0;
};

Просмотреть файл

@ -0,0 +1,617 @@
#pragma once
#include <windows.h>
#include <cmath>
#ifndef DISABLE_GPU_COUNTERS
#include <Pdh.h>
#include <PdhMsg.h>
#endif
#include <psapi.h>
#include <string>
#include <vector>
#define TIMER_SLOT_SIZE (128)
#define CONVERT_100NS_TO_SECOND(x) ((x) * 0.0000001)
#define BYTE_TO_MB(x) ((x)/(1024.0*1024.0))
// A stopwatch to measure the time passed (in seconds) between current Stop call and the closest Start call that has been called before.
class Timer
{
public:
void Start()
{
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
m_startTime = static_cast<double>(t.QuadPart);
}
double Stop()
{
LARGE_INTEGER stopTime;
QueryPerformanceCounter(&stopTime);
double t = static_cast<double>(stopTime.QuadPart) - m_startTime;
LARGE_INTEGER tps;
QueryPerformanceFrequency(&tps);
return t / static_cast<double>(tps.QuadPart);
}
private:
double m_startTime;
};
class CpuPerfCounter
{
public:
CpuPerfCounter()
{
Reset();
}
~CpuPerfCounter() {}
void Reset()
{
SYSTEM_INFO sysInfo = { 0 };
GetSystemInfo(&sysInfo);
m_startKernelTime = { 0 };
m_startUserTime = { 0 };
m_numProcessors = sysInfo.dwNumberOfProcessors;
m_procHandle = GetCurrentProcess();;
m_pid = GetCurrentProcessId();;
m_previousStartCallFailed = true;
m_processTime = 0;
m_startPageFaultCount = 0;
m_startPagefileUsage = 0;
m_startPeakPagefileUsage = 0;
m_startWorkingSetSize = 0;
m_startPeakWorkingSetSize = 0;
m_deltaPageFaultCount = 0;
m_deltaPagefileUsage = 0;
m_deltaPeakPagefileUsage = 0;
m_deltaWorkingSetSize = 0;
m_deltaPeakWorkingSetSize = 0;
}
void Start()
{
FILETIME ftIgnore, ftKernel, ftUser;
if (!GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) ||
!GetProcessMemoryCounters(m_pid, m_startPageFaultCount, m_startPagefileUsage, m_startPeakPagefileUsage, m_startWorkingSetSize, m_startPeakWorkingSetSize))
{
m_previousStartCallFailed = true;
}
else
{
memcpy(&m_startKernelTime, &ftKernel, sizeof(FILETIME));
memcpy(&m_startUserTime, &ftUser, sizeof(FILETIME));
m_previousStartCallFailed = false;
}
}
void Stop()
{
FILETIME ftIgnore, ftKernel, ftUser;
ULARGE_INTEGER stopKernelTime, stopUserTime;
ULONG stopPageFaultCount = 0;
SIZE_T stopPagefileUsage = 0;
SIZE_T stopPeakPagefileUsage = 0;
SIZE_T stopWorkingSetSize = 0;
SIZE_T stopPeakWorkingSetSize = 0;
if (m_previousStartCallFailed ||
m_numProcessors == 0 ||
!GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) ||
!GetProcessMemoryCounters(m_pid, stopPageFaultCount, stopPagefileUsage, stopPeakPagefileUsage, stopWorkingSetSize, stopPeakWorkingSetSize))
{
return;
}
memcpy(&stopKernelTime, &ftKernel, sizeof(FILETIME));
memcpy(&stopUserTime, &ftUser, sizeof(FILETIME));
m_processTime = CONVERT_100NS_TO_SECOND((stopKernelTime.QuadPart - m_startKernelTime.QuadPart) + (stopUserTime.QuadPart - m_startUserTime.QuadPart)) / m_numProcessors;
m_deltaPageFaultCount = stopPageFaultCount - m_startPageFaultCount;
m_deltaPagefileUsage = (double)BYTE_TO_MB((double)stopPagefileUsage - (double)m_startPagefileUsage);
m_deltaPeakPagefileUsage = (double)BYTE_TO_MB((double)stopPeakPagefileUsage - (double)m_startPeakPagefileUsage);
m_deltaWorkingSetSize = (double)BYTE_TO_MB((double)stopWorkingSetSize - (double)m_startWorkingSetSize);
m_deltaPeakWorkingSetSize = (double)BYTE_TO_MB((double)stopPeakWorkingSetSize - (double)m_startPeakWorkingSetSize);
}
double GetProcessTime() { return m_processTime; }
ULONG GetDeltaPageFaultCount() { return m_deltaPageFaultCount; }
double GetDeltaPageFileUsage() { return m_deltaPagefileUsage; }
double GetDeltaPeakPageFileUsage() { return m_deltaPeakPagefileUsage; }
double GetDeltaWorkingSetUsage() { return m_deltaWorkingSetSize; }
double GetDeltaPeakWorkingSetUsage() { return m_deltaPeakWorkingSetSize; }
private:
bool GetProcessMemoryCounters(DWORD pid, ULONG& pageFaultCount, SIZE_T& pageFileUsage, SIZE_T& peakPageFileUsage, SIZE_T& workingSetSize, SIZE_T& peakWorkingSetSize)
{
HANDLE hProcess = NULL;
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid);
if (NULL == hProcess)
return false;
PROCESS_MEMORY_COUNTERS pmc = { 0 };
bool result = GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc));
if (result)
{
pageFaultCount = pmc.PageFaultCount;
pageFileUsage = pmc.PagefileUsage;
peakPageFileUsage = pmc.PeakPagefileUsage;
workingSetSize = pmc.WorkingSetSize;
peakWorkingSetSize = pmc.PeakWorkingSetSize;
}
CloseHandle(hProcess);
return result;
}
ULARGE_INTEGER m_startKernelTime;
ULARGE_INTEGER m_startUserTime;
UINT m_numProcessors;
HANDLE m_procHandle;
DWORD m_pid;
bool m_previousStartCallFailed;
double m_processTime; // in second
ULONG m_startPageFaultCount;
SIZE_T m_startPagefileUsage; // in byte
SIZE_T m_startPeakPagefileUsage; // in byte
SIZE_T m_startWorkingSetSize; // in byte
SIZE_T m_startPeakWorkingSetSize; // in byte
ULONG m_deltaPageFaultCount;
double m_deltaPagefileUsage; // in MByte
double m_deltaPeakPagefileUsage; // in MByte
double m_deltaWorkingSetSize; // in MByte
double m_deltaPeakWorkingSetSize; // in MByte
};
#ifndef DISABLE_GPU_COUNTERS
class GpuPerfCounter
{
public:
GpuPerfCounter() :
m_hPDH(NULL),
m_pfnPdhOpenQuery(NULL),
m_pfnPdhAddCounter(NULL),
m_pfnPdhCollectQueryData(NULL),
m_pfnPdhGetFormattedCounterArray(NULL),
m_pfnPdhGetFormattedCounterValue(NULL),
m_pfnPdhCloseQuery(NULL),
m_query(NULL)
{
//#ifdef DISABLE_LOADLIBRARY
m_hPDH = LoadLibraryEx(L"pdh.dll", NULL, 0);
//#endif
if (m_hPDH != NULL)
{
m_pfnPdhOpenQuery = (PFNPdhOpenQuery)GetProcAddress(m_hPDH, "PdhOpenQueryW");
m_pfnPdhAddCounter = (PFNPdhAddCounter)GetProcAddress(m_hPDH, "PdhAddCounterW");
m_pfnPdhCollectQueryData = (PFNPdhCollectQueryData)GetProcAddress(m_hPDH, "PdhCollectQueryData");
m_pfnPdhGetFormattedCounterArray = (PFNPdhGetFormattedCounterArray)GetProcAddress(m_hPDH, "PdhGetFormattedCounterArrayW");
m_pfnPdhGetFormattedCounterValue = (PFNPdhGetFormattedCounterValue)GetProcAddress(m_hPDH, "PdhGetFormattedCounterValue");
m_pfnPdhCloseQuery = (PFNPdhCloseQuery)GetProcAddress(m_hPDH, "PdhCloseQuery");
}
Reset();
}
~GpuPerfCounter()
{
if (m_query)
{
CloseQuery(m_query);
m_query = NULL;
}
if (m_hPDH)
{
FreeLibrary(m_hPDH);
m_hPDH = NULL;
}
}
void Reset()
{
m_gpuUsage = 0;
m_deltaGpuDedicatedMemory = 0;
m_deltaGpuSharedMemory = 0;
// Setup PDH performance query
std::wstring pidStr = std::to_wstring(GetCurrentProcessId());
std::wstring gpuUsageQueryStr = L"\\GPU Engine(pid_*_*)\\Utilization Percentage";
std::wstring gpuDedicatedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Dedicated Usage";
std::wstring gpuSharedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Shared Usage";
gpuUsageQueryStr.replace(gpuUsageQueryStr.find('*'), 1, pidStr);
gpuDedicatedMemQueryStr.replace(gpuDedicatedMemQueryStr.find('*'), 1, pidStr);
gpuSharedMemQueryStr.replace(gpuSharedMemQueryStr.find('*'), 1, pidStr);
// Open query
if (m_query) CloseQuery(m_query);
m_query = NULL;
OpenQuery(NULL, NULL, &m_query);
AddCounter(m_query, gpuUsageQueryStr.c_str(), NULL, &m_gpuUsageCounter);
AddCounter(m_query, gpuDedicatedMemQueryStr.c_str(), NULL, &m_gpuDedicatedMemUsageCounter);
AddCounter(m_query, gpuSharedMemQueryStr.c_str(), NULL, &m_gpuSharedMemUsageCounter);
}
void Start()
{
PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {};
PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {};
PDH_STATUS status = S_OK;
// Usage rate counter require two queries. Put first one at Start() and second on at Stop()
CollectQueryData(m_query);
// Gpu dedicated ememory
status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue);
m_startGpuDedicatedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue) : m_startGpuDedicatedMemory;
// Gpu shared ememory
status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue);
m_startGpuSharedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue) : m_startGpuSharedMemory;
}
void Stop()
{
PDH_FMT_COUNTERVALUE_ITEM* gpuUsageCounterValue = nullptr;
PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {};
PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {};
DWORD bufferSize = 0;
DWORD itemCount = 0;
PDH_STATUS status = S_OK;
// Query the gpu usage.
// For different IHVs, compute shader usage could be counted as either 3D or compute engine usage.
// Here we simply pick the max usage from all types of engines to see if bottleneck is from GPU.
// The same concept has been used in task manager to display GPU usage.
status = CollectQueryData(m_query);
if (S_OK != status && PDH_NO_DATA != status)
return;
status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue);
if (PDH_MORE_DATA != status)
return;
gpuUsageCounterValue = (PDH_FMT_COUNTERVALUE_ITEM *)malloc(bufferSize);
if (gpuUsageCounterValue != nullptr)
{
status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue);
if (ERROR_SUCCESS == status)
{
double maxValue = 0;
for (size_t i = 0; i < itemCount; ++i)
{
maxValue = (gpuUsageCounterValue[i].FmtValue.doubleValue > maxValue) ? gpuUsageCounterValue[i].FmtValue.doubleValue : maxValue;
}
m_gpuUsage = maxValue;
}
}
free(gpuUsageCounterValue);
gpuUsageCounterValue = NULL;
bufferSize = 0;
itemCount = 0;
double stopGpuDedicatedMemory; // in MB
double stopGpuSharedMemory; // in MB
// Gpu dedicated ememory delta. Don't update the value if counter doesn't get values correctly.
status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue);
if (ERROR_SUCCESS == status)
{
stopGpuDedicatedMemory = (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue);
m_deltaGpuDedicatedMemory = stopGpuDedicatedMemory - m_startGpuDedicatedMemory;
}
// Gpu shared ememory. Don't update the value if counter doesn't get values correctly.
status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue);
if (ERROR_SUCCESS == status)
{
stopGpuSharedMemory = (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue);
m_deltaGpuSharedMemory = stopGpuSharedMemory - m_startGpuSharedMemory;
}
}
double GetGpuUsage() const { return m_gpuUsage; }
double GetDedicatedMemory() const { return m_deltaGpuDedicatedMemory; }
double GetSharedMemory() const { return m_deltaGpuSharedMemory; }
private:
// Pdh function prototypes
typedef PDH_STATUS(WINAPI *PFNPdhOpenQuery)(_In_opt_ LPCWSTR szDataSource, _In_ DWORD_PTR dwUserData, _Out_ PDH_HQUERY * phQuery);
typedef PDH_STATUS(WINAPI *PFNPdhAddCounter)(_In_ PDH_HQUERY hQuery, _In_ LPCWSTR szFullCounterPath, _In_ DWORD_PTR dwUserData, _Out_ PDH_HCOUNTER * phCounter);
typedef PDH_STATUS(WINAPI *PFNPdhCollectQueryData)(_Inout_ PDH_HQUERY hQuery);
typedef PDH_STATUS(WINAPI *PFNPdhGetFormattedCounterArray)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Inout_ LPDWORD lpdwBufferSize, _Out_ LPDWORD lpdwItemCount, _Out_writes_bytes_opt_(*lpdwBufferSize) PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer);
typedef PDH_STATUS(WINAPI *PFNPdhGetFormattedCounterValue)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Out_opt_ LPDWORD lpdwType, _Out_ PPDH_FMT_COUNTERVALUE pValue);
typedef PDH_STATUS(WINAPI *PFNPdhCloseQuery)(_Inout_ PDH_HQUERY hQuery);
PDH_STATUS OpenQuery(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY * phQuery)
{
return (m_pfnPdhOpenQuery) ? m_pfnPdhOpenQuery(szDataSource, dwUserData, phQuery) : ERROR_MOD_NOT_FOUND;
}
PDH_STATUS AddCounter(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER * phCounter)
{
return (m_pfnPdhAddCounter) ? m_pfnPdhAddCounter(hQuery, szFullCounterPath, dwUserData, phCounter) : ERROR_MOD_NOT_FOUND;
}
PDH_STATUS CollectQueryData(PDH_HQUERY hQuery)
{
return (m_pfnPdhCollectQueryData) ? m_pfnPdhCollectQueryData(hQuery) : ERROR_MOD_NOT_FOUND;
}
PDH_STATUS GetFormattedCounterArray(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwBufferSize, LPDWORD lpdwItemCount, PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer)
{
return (m_pfnPdhGetFormattedCounterArray) ? m_pfnPdhGetFormattedCounterArray(hCounter, dwFormat, lpdwBufferSize, lpdwItemCount, ItemBuffer) : ERROR_MOD_NOT_FOUND;
}
PDH_STATUS GetFormattedCounterValue(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue)
{
return (m_pfnPdhGetFormattedCounterValue) ? m_pfnPdhGetFormattedCounterValue(hCounter, dwFormat, lpdwType, pValue) : ERROR_MOD_NOT_FOUND;
}
PDH_STATUS CloseQuery(PDH_HQUERY hQuery)
{
return (m_pfnPdhCloseQuery) ? m_pfnPdhCloseQuery(hQuery) : ERROR_MOD_NOT_FOUND;
}
// PDH Performance Query
HMODULE m_hPDH;
PFNPdhOpenQuery m_pfnPdhOpenQuery;
PFNPdhAddCounter m_pfnPdhAddCounter;
PFNPdhCollectQueryData m_pfnPdhCollectQueryData;
PFNPdhGetFormattedCounterArray m_pfnPdhGetFormattedCounterArray;
PFNPdhGetFormattedCounterValue m_pfnPdhGetFormattedCounterValue;
PFNPdhCloseQuery m_pfnPdhCloseQuery;
HQUERY m_query;
HCOUNTER m_gpuUsageCounter;
HCOUNTER m_gpuDedicatedMemUsageCounter;
HCOUNTER m_gpuSharedMemUsageCounter;
// Process info
DWORD m_pid;
// Data
double m_gpuUsage;
double m_startGpuDedicatedMemory; // in MB
double m_startGpuSharedMemory; // in MB
double m_deltaGpuDedicatedMemory; // in MB
double m_deltaGpuSharedMemory; // in MB
};
#endif
typedef enum CounterType
{
TIMER = 0,
CPU_USAGE,
PAGE_FAULT_COUNT,
PAGE_FILE_USAGE,
PEAK_PAGE_FILE_USAGE,
WORKING_SET_USAGE,
PEAK_WORKING_SET_USAGE,
GPU_USAGE,
GPU_DEDICATED_MEM_USAGE,
GPU_SHARED_MEM_USAGE,
TYPE_COUNT
} CounterType;
const static std::vector<std::wstring> CounterTypeName =
{
L"TIMER",
L"CPU USAGE",
L"PAGE FAULT COUNT",
L"PAGE FILE USAGE",
L"PEAK PAGE FILE USAGE",
L"WORKING SET USAGE",
L"PEAK WORK SET USAGE",
L"GPU USAGE",
L"GPU_DEDICATED_MEM_USAGE",
L"GPU_SHARED_MEM_USAGE"
};
// A statistics helper for Timer/CpuPerfCounter/GpuPerfCounter class.
// It keeps the latest "TIMER_SLOT_SIZE" measured data in a ring buffer.
// The statistic functions (e.g. GetVariance) assume data always starts from index 0 of the buffer.
class PerfCounterStatistics
{
public:
PerfCounterStatistics() : m_bDisabled(true)
{
}
void Enable()
{
m_bDisabled = false;
}
void Disable()
{
m_bDisabled = true;
}
void Reset()
{
if (m_bDisabled)
return;
m_pos = 0;
m_bBufferFull = false;
m_cpuCounter.Reset();
#ifndef DISABLE_GPU_COUNTERS
m_gpuCounter.Reset();
#endif
for (int i = 0; i < CounterType::TYPE_COUNT; ++i)
{
m_data[i].Reset();
}
}
void Start()
{
if (m_bDisabled)
return;
m_timer.Start();
m_cpuCounter.Start();
#ifndef DISABLE_GPU_COUNTERS
m_gpuCounter.Start();
#endif
}
void Stop()
{
if (m_bDisabled)
return;
double counterValue[CounterType::TYPE_COUNT];
// Query counters
double time = m_timer.Stop();
m_cpuCounter.Stop();
#ifndef DISABLE_GPU_COUNTERS
m_gpuCounter.Stop();
#endif
// Get counter values
counterValue[CounterType::TIMER] = time;
counterValue[CounterType::CPU_USAGE] = 100.0 * m_cpuCounter.GetProcessTime() / time;
counterValue[CounterType::PAGE_FAULT_COUNT] = m_cpuCounter.GetDeltaPageFaultCount();
counterValue[CounterType::PAGE_FILE_USAGE] = m_cpuCounter.GetDeltaPageFileUsage();
counterValue[CounterType::PEAK_PAGE_FILE_USAGE] = m_cpuCounter.GetDeltaPeakPageFileUsage();
counterValue[CounterType::WORKING_SET_USAGE] = m_cpuCounter.GetDeltaWorkingSetUsage();
counterValue[CounterType::PEAK_WORKING_SET_USAGE] = m_cpuCounter.GetDeltaPeakWorkingSetUsage();
#ifndef DISABLE_GPU_COUNTERS
counterValue[CounterType::GPU_USAGE] = m_gpuCounter.GetGpuUsage();
counterValue[CounterType::GPU_DEDICATED_MEM_USAGE] = m_gpuCounter.GetDedicatedMemory();
counterValue[CounterType::GPU_SHARED_MEM_USAGE] = m_gpuCounter.GetSharedMemory();
#endif
// Update data blocks
for (int i = 0; i < CounterType::TYPE_COUNT; ++i)
{
m_data[i].total = m_data[i].total - m_data[i].measured[m_pos] + counterValue[i];
m_data[i].measured[m_pos] = counterValue[i];
m_data[i].max = (counterValue[i] > m_data[i].max) ? counterValue[i] : m_data[i].max;
m_data[i].min = (counterValue[i] < m_data[i].min) ? counterValue[i] : m_data[i].min;
}
// Update buffer index
if (m_pos + 1 >= TIMER_SLOT_SIZE)
{
m_pos = 0;
m_bBufferFull = true;
}
else
{
++m_pos;
}
}
int GetCount() const { return (m_bBufferFull) ? TIMER_SLOT_SIZE : m_pos; }
double GetAverage(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].total / GetCount(); }
double GetMin(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].min; }
double GetMax(CounterType t) const { return (m_bDisabled) ? 0 : m_data[t].max; }
double GetValues(CounterType t, int index) const { return (m_bDisabled) ? 0 : m_data[t].measured[index]; }
double GetStdev(CounterType t) const { return (m_bDisabled) ? 0 : sqrt(GetVariance(t)); }
double GetVariance(CounterType t) const
{
if (m_bDisabled)
return 0;
int count = GetCount();
double average = m_data[t].total / count;
double var = 0;
for (int i = 0; i < count; ++i)
{
var += (m_data[t].measured[i] - average) * (m_data[t].measured[i] - average);
}
return var / count;
}
private:
struct DataBlock
{
void Reset()
{
max = 0;
min = DBL_MAX;
total = 0;
memset(measured, 0, sizeof(double)*TIMER_SLOT_SIZE);
}
double max;
double min;
double total;
double measured[TIMER_SLOT_SIZE];
};
int m_pos;
bool m_bBufferFull;
bool m_bDisabled;
Timer m_timer;
CpuPerfCounter m_cpuCounter;
#ifndef DISABLE_GPU_COUNTERS
GpuPerfCounter m_gpuCounter;
#endif
DataBlock m_data[CounterType::TYPE_COUNT];
};
// A class to wrap up multiple PerfCounterStatistics objects.
// To create a profiler, define intervals in an enum and use it to create the profiler object.
// See an example in engine/test/Model/ModelTest.cpp
template<typename T>
class Profiler
{
public:
void Reset(int begin, int end)
{
for (int i = begin; i < end; ++i)
{
m_perfCounterStat[i].Reset();
}
}
void Reset()
{
Reset(0, T::COUNT);
}
PerfCounterStatistics& GetCounter(int t)
{
return m_perfCounterStat[t];
}
PerfCounterStatistics& operator [] (int t)
{
return m_perfCounterStat[t];
}
void Enable()
{
for (int i = 0; i < T::COUNT; ++i)
{
m_perfCounterStat[i].Enable();
}
}
void Disable()
{
for (int i = 0; i < T::COUNT; ++i)
{
m_perfCounterStat[i].Disable();
}
}
private:
PerfCounterStatistics m_perfCounterStat[T::COUNT];
};
#define WINML_PROFILING
#ifdef WINML_PROFILING
#define WINML_PROFILING_START(profiler, interval) profiler[interval].Start()
#define WINML_PROFILING_STOP(profiler, interval) profiler[interval].Stop()
#else
#define WINML_PROFILING_START(profiler, interval) do {} while(0)
#define WINML_PROFILING_STOP(profiler, interval) do {} while(0)
#endif

Просмотреть файл

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.27004.2005
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WinMLModelRunner", "WinMLModelRunner.vcxproj", "{81EA9CC6-8A26-4583-B1A4-84740EF815C8}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x64.ActiveCfg = Debug|x64
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x64.Build.0 = Debug|x64
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x86.ActiveCfg = Debug|Win32
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Debug|x86.Build.0 = Debug|Win32
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x64.ActiveCfg = Release|x64
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x64.Build.0 = Release|x64
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x86.ActiveCfg = Release|Win32
{81EA9CC6-8A26-4583-B1A4-84740EF815C8}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {D193B2D4-1FF5-4E14-9334-E5EF4C8F9069}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,151 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CommandLineArgs.cpp" />
<ClCompile Include="Main.cpp" />
<ClCompile Include="Stopwatch.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="BindingUtilities.h" />
<ClInclude Include="CommandLineArgs.h" />
<ClInclude Include="Common.h" />
<ClInclude Include="OutputHelper.h" />
<ClInclude Include="ModelBinding.h" />
<ClInclude Include="Stopwatch.h" />
<ClInclude Include="TimerHelper.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<CppWinRTEnabled>true</CppWinRTEnabled>
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{81EA9CC6-8A26-4583-B1A4-84740EF815C8}</ProjectGuid>
<RootNamespace>Benchmark</RootNamespace>
<WindowsTargetPlatformVersion>10.0.17713.0</WindowsTargetPlatformVersion>
<ProjectName>WinMLModelRunner</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<IntDir>$(Platform)\$(Configuration)\ to $(Platform)\$(Configuration)\$(Benchmark)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ShowIncludes>true</ShowIncludes>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<LanguageStandard>stdcpp17</LanguageStandard>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>dxgi.lib;d3d12.lib;windowsapp.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ShowIncludes>true</ShowIncludes>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ShowIncludes>true</ShowIncludes>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>dxgi.lib;d3d12.lib;winml.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,33 @@
The WinMLModelRunner program can run .onnx or .pb models where the input and output variables
are tensors. It allows you to run WinML on the GPU or CPU, and if neither are specified will
run the test multiple times to generate separate GPU and CPU performance measurements. The GPU,
CPU and wall-clock times for loading, binding, and evaluating and the CPU and GPU memory usage during
evaluate will print to the command line and to a CSV file.
If no csv file name is specified, the program will create csv titled
"WinML Model Run [Today's date].csv" in the same folder as the .exe file.
Command-Line Options:
---------------------------------------------------------------------------------------
Required command-Line arguments:
-model <path> : Path to a .onnx model file.
-folder <path> : Path to a folder with .onnx models, will run all of the models in the folder.
Optional command-line arguments:
-iterations <int> : Number of times to evaluate the model.
-CPU : Will create a session on the CPU.
-GPU : Will create a session on the GPU.
-csv <file name> : Will create a CSV file and output the performance measurements to it.
Examples:
---------------------------------------------------------------------------------------
Run 'concat' operator on the CPU and GPU separately 5 times:
> WinMLModelRunner.exe -model c:\\data\\concat.onnx -iterations 5
Run all the models in the data folder 3 times using only the CPU:
> WinMLModelRunner .exe -folder c:\\data -iterations 3 -CPU
Run all of the models in the data folder on the GPU and CPU once and output the
performance data to benchmarkdata.csv:
> WinMLModelRunner.exe -folder c:\\data -csv benchmarkdata.csv