Preprocess works
This commit is contained in:
Родитель
a6426641cd
Коммит
35764d6c98
|
@ -38,17 +38,18 @@ namespace WinMLSamplesGallery.Samples
|
|||
|
||||
private async void LaunchWindow(object sender, RoutedEventArgs e)
|
||||
{
|
||||
Task.Run(() => WinMLSamplesGalleryNative.DXResourceBinding.LaunchWindow());
|
||||
//Task.Run(() => WinMLSamplesGalleryNative.DXResourceBinding.LaunchWindow());
|
||||
WinMLSamplesGalleryNative.DXResourceBinding.LaunchWindow();
|
||||
//WinMLSamplesGalleryNative.DXResourceBinding.EvalORT();
|
||||
System.Threading.Thread.Sleep(2000);
|
||||
//System.Threading.Thread.Sleep(2000);
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
float[] results = await Task.Run(() => WinMLSamplesGalleryNative.DXResourceBinding.EvalORT());
|
||||
UpdateClassification(results);
|
||||
System.Diagnostics.Debug.WriteLine("Updated ui with eval");
|
||||
//System.Threading.Thread.Sleep(10000);
|
||||
}
|
||||
//for (int i = 0; i < 10; i++)
|
||||
//{
|
||||
// float[] results = await Task.Run(() => WinMLSamplesGalleryNative.DXResourceBinding.EvalORT());
|
||||
// UpdateClassification(results);
|
||||
// System.Diagnostics.Debug.WriteLine("Updated ui with eval");
|
||||
// //System.Threading.Thread.Sleep(10000);
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
@ -60,10 +61,10 @@ namespace WinMLSamplesGallery.Samples
|
|||
//}
|
||||
}
|
||||
|
||||
private async Task<float[]> ClassifyFrame()
|
||||
{
|
||||
return WinMLSamplesGalleryNative.DXResourceBinding.EvalORT();
|
||||
}
|
||||
//private async Task<float[]> ClassifyFrame()
|
||||
//{
|
||||
// return WinMLSamplesGalleryNative.DXResourceBinding.EvalORT();
|
||||
//}
|
||||
|
||||
void UpdateClassification(float[] results)
|
||||
{
|
||||
|
|
|
@ -414,6 +414,265 @@ std::vector<float> EvalORTInference(const Ort::Value& prev_input) {
|
|||
|
||||
}
|
||||
|
||||
winrt::com_array<float> Preproces()
|
||||
{
|
||||
OutputDebugString(L"In Preprocess");
|
||||
// Squeezenet opset v7 https://github.com/onnx/models/blob/master/vision/classification/squeezenet/README.md
|
||||
//const wchar_t* modelFilePath = L"./squeezenet1.1-7.onnx";
|
||||
const wchar_t* modelFilePath = L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/squeezenet1.1-7.onnx";
|
||||
const char* modelInputTensorName = "data";
|
||||
const char* modelOutputTensorName = "squeezenet0_flatten0_reshape0";
|
||||
const char* preprocessModelInputTensorName = "Input";
|
||||
const char* preprocessModelOutputTensorName = "Output";
|
||||
// Might have to change the 3's below to 4 for rgba
|
||||
const std::array<int64_t, 4> preprocessInputShape = { 1, 512, 512, 4 };
|
||||
const std::array<int64_t, 4> preprocessOutputShape = { 1, 3, 224, 224 };
|
||||
|
||||
HRESULT hr;
|
||||
ID3D12Resource* new_buffer;
|
||||
ID3D12Resource* current_buffer;
|
||||
|
||||
D3D12_RESOURCE_DESC resourceDesc = {
|
||||
D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
0,
|
||||
static_cast<uint64_t>(800 * 600 * 3 * 4),
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
DXGI_FORMAT_UNKNOWN,
|
||||
{1, 0},
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
|
||||
};
|
||||
|
||||
const CD3DX12_HEAP_PROPERTIES default_heap(D3D12_HEAP_TYPE_DEFAULT);
|
||||
hr = device->CreateCommittedResource(
|
||||
&default_heap, // a default heap
|
||||
D3D12_HEAP_FLAG_NONE, // no flags
|
||||
&resourceDesc, // resource description for a buffer
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, // we will start this heap in the copy destination state since we will copy data
|
||||
// from the upload heap to this heap
|
||||
nullptr, // optimized clear value must be null for this type of resource. used for render targets and depth/stencil buffers
|
||||
IID_PPV_ARGS(&new_buffer));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Running = false;
|
||||
//return false;
|
||||
}
|
||||
|
||||
hr = swapChain->GetBuffer(frameIndex, IID_PPV_ARGS(¤t_buffer));
|
||||
auto buffer_desc = current_buffer->GetDesc();
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
OutputDebugString(L"Failed to get buffer");
|
||||
//return false;
|
||||
}
|
||||
|
||||
const auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(current_buffer, D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
commandAllocator[frameIndex]->Reset();
|
||||
commandList->CopyResource(new_buffer, current_buffer);
|
||||
|
||||
auto new_buffer_desc = new_buffer->GetDesc();
|
||||
|
||||
|
||||
//commandList->CopyTextureRegion(new_buffer, 10, 20, 0, pSourceTexture);
|
||||
|
||||
//Try this if it doesn't workwidth = width * height and height = 1
|
||||
|
||||
long newH = 224;
|
||||
long newW = 224;
|
||||
long h = 512;
|
||||
long w = 512;
|
||||
std::array<long, 6> center_fill_dimensions = CalculateCenterFillDimensions(h, w, newH, newW);
|
||||
long resizedW = center_fill_dimensions[0];
|
||||
long resizedH = center_fill_dimensions[1];
|
||||
long top = center_fill_dimensions[2];
|
||||
long bottom = center_fill_dimensions[3];
|
||||
long left = center_fill_dimensions[4];
|
||||
long right = center_fill_dimensions[5];
|
||||
winrt::hstring interpolationMode = L"nearest";
|
||||
long c = 3;
|
||||
|
||||
|
||||
//auto resize_op = LearningModelOperator(L"Resize")
|
||||
// .SetInput(L"X", L"Input")
|
||||
// .SetConstant(L"roi", TensorFloat::CreateFromIterable({ 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 }))
|
||||
// .SetConstant(L"scales", TensorFloat::CreateFromIterable({ 4 }, { 1, (float)(1 + resizedH) / (float)h, (float)(1 + resizedH) / (float)h, 1 }))
|
||||
// .SetAttribute(L"mode", TensorString::CreateFromArray({}, { interpolationMode }))
|
||||
// .SetOutput(L"Y", L"ResizeOutput");
|
||||
|
||||
//auto slice_op = LearningModelOperator(L"Slice")
|
||||
// .SetInput(L"data", L"ResizeOutput")
|
||||
// .SetConstant(L"starts", TensorInt64Bit::CreateFromIterable({ 4 }, { 0, top, left, 0 }))
|
||||
// .SetConstant(L"ends", TensorInt64Bit::CreateFromIterable({ 4 }, { LLONG_MAX, bottom, right, 3 }))
|
||||
// .SetOutput(L"output", L"SliceOutput");
|
||||
|
||||
//auto dimension_transpose = LearningModelOperator(L"Transpose")
|
||||
// .SetInput(L"data", L"SliceOutput")
|
||||
// .SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { INT64(0), INT64(3), INT64(1), INT64(2)}))
|
||||
// .SetOutput(L"transposed", L"Output");
|
||||
|
||||
//auto preprocessingModelBuilder =
|
||||
// LearningModelBuilder::Create(12)
|
||||
// .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, preprocessInputShape))
|
||||
// .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, preprocessOutputShape))
|
||||
// .Operators().Add(resize_op)
|
||||
// .Operators().Add(slice_op)
|
||||
// .Operators().Add(dimension_transpose);
|
||||
//auto preprocessingModel = preprocessingModelBuilder.CreateModel();
|
||||
|
||||
//preprocessingModelBuilder.Save(L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/dx_preprocessor.onnx");
|
||||
const wchar_t* preprocessingModelFilePath = L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/dx_preprocessor.onnx";
|
||||
|
||||
const bool passTensorsAsD3DResources = true;
|
||||
|
||||
LARGE_INTEGER startTime;
|
||||
LARGE_INTEGER d3dDeviceCreationTime;
|
||||
LARGE_INTEGER sessionCreationTime;
|
||||
LARGE_INTEGER tensorCreationTime;
|
||||
LARGE_INTEGER bindingTime;
|
||||
LARGE_INTEGER runTime;
|
||||
LARGE_INTEGER synchronizeOutputsTime;
|
||||
LARGE_INTEGER cpuFrequency;
|
||||
QueryPerformanceFrequency(&cpuFrequency);
|
||||
QueryPerformanceCounter(&startTime);
|
||||
|
||||
try
|
||||
{
|
||||
ComPtr<ID3D12Device> d3d12Device;
|
||||
THROW_IF_FAILED(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&d3d12Device)));
|
||||
QueryPerformanceCounter(&d3dDeviceCreationTime);
|
||||
|
||||
OrtApi const& ortApi = Ort::GetApi(); // Uses ORT_API_VERSION
|
||||
const OrtDmlApi* ortDmlApi;
|
||||
THROW_IF_NOT_OK(ortApi.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ortDmlApi)));
|
||||
|
||||
// ONNX Runtime setup
|
||||
Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "DirectML_Direct3D_TensorAllocation_Test");
|
||||
Ort::SessionOptions sessionOptions;
|
||||
sessionOptions.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
|
||||
sessionOptions.DisableMemPattern();
|
||||
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
||||
ortApi.AddFreeDimensionOverrideByName(sessionOptions, "batch_size", 1);
|
||||
OrtSessionOptionsAppendExecutionProvider_DML(sessionOptions, 0);
|
||||
//Ort::Session session = Ort::Session(ortEnvironment, modelFilePath, sessionOptions);
|
||||
Ort::Session session = Ort::Session(ortEnvironment, preprocessingModelFilePath, sessionOptions);
|
||||
|
||||
QueryPerformanceCounter(&sessionCreationTime);
|
||||
|
||||
Ort::IoBinding ioBinding = Ort::IoBinding::IoBinding(session);
|
||||
const char* memoryInformationName = passTensorsAsD3DResources ? "DML" : "Cpu";
|
||||
Ort::MemoryInfo memoryInformation(memoryInformationName, OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
|
||||
// Not needed: Ort::Allocator allocator(session, memoryInformation);
|
||||
|
||||
// Create input tensor.
|
||||
//Ort::Value inputTensor(nullptr);
|
||||
//std::vector<float> inputTensorValues(static_cast<size_t>(GetElementCount(inferenceInputShape)), 0.0f);
|
||||
//std::iota(inputTensorValues.begin(), inputTensorValues.end(), 0.0f);
|
||||
ComPtr<IUnknown> inputTensorEpWrapper;
|
||||
|
||||
//Ort::Value inputTensor(nullptr);
|
||||
//std::vector<float> inputTensorValues(static_cast<size_t>(GetElementCount(preprocessInputShape)), 0.0f);
|
||||
//std::iota(inputTensorValues.begin(), inputTensorValues.end(), 0.0f);
|
||||
//Microsoft::WRL::ComPtr<IUnknown> inputTensorEpWrapper;
|
||||
|
||||
//// Create empty D3D resource for input.
|
||||
//inputTensor = CreateTensorValueUsingD3DResource(
|
||||
// d3d12Device.Get(),
|
||||
// *ortDmlApi,
|
||||
// memoryInformation,
|
||||
// preprocessInputShape,
|
||||
// ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
// sizeof(float),
|
||||
// /*out*/ IID_PPV_ARGS_Helper(inputTensorEpWrapper.GetAddressOf())
|
||||
//);
|
||||
|
||||
/* Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> to_cpy;
|
||||
RETURN_IF_FAILED((
|
||||
create_resource_barrier_command_list<D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE>(
|
||||
d3d12Device.Get(),
|
||||
commandQueue,
|
||||
commandAllocator,
|
||||
position_buffer_.Get(),
|
||||
&to_cpy)));
|
||||
|
||||
ID3D12CommandList* const to_cpy_list[] = {
|
||||
to_cpy.Get()
|
||||
};*/
|
||||
|
||||
//commandQueue->ExecuteCommandLists(_countof(to_cpy_list), to_cpy_list);
|
||||
|
||||
Ort::Value inputTensor = CreateTensorValueFromRTVResource(
|
||||
*ortDmlApi,
|
||||
memoryInformation,
|
||||
new_buffer,
|
||||
preprocessInputShape,
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
/*out*/ IID_PPV_ARGS_Helper(inputTensorEpWrapper.GetAddressOf())
|
||||
);
|
||||
|
||||
// Create output tensor on device memory.
|
||||
//Ort::Value outputTensor(nullptr);
|
||||
//std::vector<float> outputTensorValues(static_cast<size_t>(GetElementCount(inferenceOutputShape)), 0.0f);
|
||||
//Microsoft::WRL::ComPtr<IUnknown> outputTensorEpWrapper;
|
||||
|
||||
Ort::Value outputTensor(nullptr);
|
||||
std::vector<float> outputTensorValues(static_cast<size_t>(GetElementCount(preprocessOutputShape)), 0.0f);
|
||||
ComPtr<IUnknown> outputTensorEpWrapper;
|
||||
|
||||
outputTensor = CreateTensorValueUsingD3DResource(
|
||||
d3d12Device.Get(),
|
||||
*ortDmlApi,
|
||||
memoryInformation,
|
||||
preprocessOutputShape,
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
sizeof(float),
|
||||
/*out*/ IID_PPV_ARGS_Helper(outputTensorEpWrapper.GetAddressOf())
|
||||
);
|
||||
|
||||
QueryPerformanceCounter(&tensorCreationTime);
|
||||
|
||||
////////////////////////////////////////
|
||||
// Bind the tensor inputs to the model, and run it.
|
||||
ioBinding.BindInput(preprocessModelInputTensorName, inputTensor);
|
||||
ioBinding.BindOutput(preprocessModelOutputTensorName, outputTensor);
|
||||
ioBinding.SynchronizeInputs();
|
||||
QueryPerformanceCounter(&bindingTime);
|
||||
|
||||
Ort::RunOptions runOptions;
|
||||
|
||||
// TODO: Upload inputTensorValues to GPU inputTensor.
|
||||
|
||||
printf("Beginning execution.\n");
|
||||
printf("Running Session.\n");
|
||||
session.Run(runOptions, ioBinding);
|
||||
OutputDebugString(L"Done evaluating preprocessing session");
|
||||
//ioBinding.SynchronizeOutputs();
|
||||
QueryPerformanceCounter(&synchronizeOutputsTime);
|
||||
|
||||
|
||||
auto eval_results_std = EvalORTInference(outputTensor);
|
||||
winrt::com_array<float> eval_results(1000);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
eval_results[i] = eval_results_std[i];
|
||||
}
|
||||
return eval_results;
|
||||
}
|
||||
catch (Ort::Exception const& exception)
|
||||
{
|
||||
printf("Error running model inference: %s\n", exception.what());
|
||||
//return EXIT_FAILURE;
|
||||
}
|
||||
catch (std::exception const& exception)
|
||||
{
|
||||
printf("Error running model inference: %s\n", exception.what());
|
||||
//return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static HMODULE GetCurrentModule()
|
||||
{ // NB: XP+ solution!
|
||||
HMODULE hModule = NULL;
|
||||
|
@ -428,272 +687,16 @@ static HMODULE GetCurrentModule()
|
|||
namespace winrt::WinMLSamplesGalleryNative::implementation
|
||||
{
|
||||
int DXResourceBinding::LaunchWindow() {
|
||||
OutputDebugString(L"Will this output work?");
|
||||
OutputDebugString(L"In Launch Window\n");
|
||||
//LaunchNewWindow();
|
||||
HINSTANCE hInstance = GetCurrentModule();
|
||||
StartHWind(hInstance, 10);
|
||||
std::thread hwnd_th(StartHWind, hInstance, 10);
|
||||
hwnd_th.detach();
|
||||
Sleep(2000);
|
||||
|
||||
Preproces();
|
||||
return 0;
|
||||
}
|
||||
|
||||
winrt::com_array<float> DXResourceBinding::EvalORT()
|
||||
{
|
||||
OutputDebugString(L"In EvalORT");
|
||||
// Squeezenet opset v7 https://github.com/onnx/models/blob/master/vision/classification/squeezenet/README.md
|
||||
//const wchar_t* modelFilePath = L"./squeezenet1.1-7.onnx";
|
||||
const wchar_t* modelFilePath = L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/squeezenet1.1-7.onnx";
|
||||
const char* modelInputTensorName = "data";
|
||||
const char* modelOutputTensorName = "squeezenet0_flatten0_reshape0";
|
||||
const char* preprocessModelInputTensorName = "Input";
|
||||
const char* preprocessModelOutputTensorName = "Output";
|
||||
// Might have to change the 3's below to 4 for rgba
|
||||
const std::array<int64_t, 4> preprocessInputShape = { 1, 512, 512, 4 };
|
||||
const std::array<int64_t, 4> preprocessOutputShape = { 1, 3, 224, 224 };
|
||||
|
||||
HRESULT hr;
|
||||
ID3D12Resource* new_buffer;
|
||||
ID3D12Resource* current_buffer;
|
||||
|
||||
D3D12_RESOURCE_DESC resourceDesc = {
|
||||
D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
0,
|
||||
static_cast<uint64_t>(800 * 600 * 3 * 4),
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
DXGI_FORMAT_UNKNOWN,
|
||||
{1, 0},
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
|
||||
};
|
||||
|
||||
const CD3DX12_HEAP_PROPERTIES default_heap(D3D12_HEAP_TYPE_DEFAULT);
|
||||
hr = device->CreateCommittedResource(
|
||||
&default_heap, // a default heap
|
||||
D3D12_HEAP_FLAG_NONE, // no flags
|
||||
&resourceDesc, // resource description for a buffer
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, // we will start this heap in the copy destination state since we will copy data
|
||||
// from the upload heap to this heap
|
||||
nullptr, // optimized clear value must be null for this type of resource. used for render targets and depth/stencil buffers
|
||||
IID_PPV_ARGS(&new_buffer));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Running = false;
|
||||
//return false;
|
||||
}
|
||||
|
||||
hr = swapChain->GetBuffer(frameIndex, IID_PPV_ARGS(¤t_buffer));
|
||||
auto buffer_desc = current_buffer->GetDesc();
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
OutputDebugString(L"Failed to get buffer");
|
||||
//return false;
|
||||
}
|
||||
|
||||
const auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(current_buffer, D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
commandAllocator[frameIndex]->Reset();
|
||||
commandList->CopyResource(new_buffer, current_buffer);
|
||||
|
||||
auto new_buffer_desc = new_buffer->GetDesc();
|
||||
|
||||
|
||||
//commandList->CopyTextureRegion(new_buffer, 10, 20, 0, pSourceTexture);
|
||||
|
||||
//Try this if it doesn't workwidth = width * height and height = 1
|
||||
|
||||
long newH = 224;
|
||||
long newW = 224;
|
||||
long h = 512;
|
||||
long w = 512;
|
||||
std::array<long, 6> center_fill_dimensions = CalculateCenterFillDimensions(h, w, newH, newW);
|
||||
long resizedW = center_fill_dimensions[0];
|
||||
long resizedH = center_fill_dimensions[1];
|
||||
long top = center_fill_dimensions[2];
|
||||
long bottom = center_fill_dimensions[3];
|
||||
long left = center_fill_dimensions[4];
|
||||
long right = center_fill_dimensions[5];
|
||||
winrt::hstring interpolationMode = L"nearest";
|
||||
long c = 3;
|
||||
|
||||
|
||||
//auto resize_op = LearningModelOperator(L"Resize")
|
||||
// .SetInput(L"X", L"Input")
|
||||
// .SetConstant(L"roi", TensorFloat::CreateFromIterable({ 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 }))
|
||||
// .SetConstant(L"scales", TensorFloat::CreateFromIterable({ 4 }, { 1, (float)(1 + resizedH) / (float)h, (float)(1 + resizedH) / (float)h, 1 }))
|
||||
// .SetAttribute(L"mode", TensorString::CreateFromArray({}, { interpolationMode }))
|
||||
// .SetOutput(L"Y", L"ResizeOutput");
|
||||
|
||||
//auto slice_op = LearningModelOperator(L"Slice")
|
||||
// .SetInput(L"data", L"ResizeOutput")
|
||||
// .SetConstant(L"starts", TensorInt64Bit::CreateFromIterable({ 4 }, { 0, top, left, 0 }))
|
||||
// .SetConstant(L"ends", TensorInt64Bit::CreateFromIterable({ 4 }, { LLONG_MAX, bottom, right, 3 }))
|
||||
// .SetOutput(L"output", L"SliceOutput");
|
||||
|
||||
//auto dimension_transpose = LearningModelOperator(L"Transpose")
|
||||
// .SetInput(L"data", L"SliceOutput")
|
||||
// .SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { INT64(0), INT64(3), INT64(1), INT64(2)}))
|
||||
// .SetOutput(L"transposed", L"Output");
|
||||
|
||||
//auto preprocessingModelBuilder =
|
||||
// LearningModelBuilder::Create(12)
|
||||
// .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, preprocessInputShape))
|
||||
// .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, preprocessOutputShape))
|
||||
// .Operators().Add(resize_op)
|
||||
// .Operators().Add(slice_op)
|
||||
// .Operators().Add(dimension_transpose);
|
||||
//auto preprocessingModel = preprocessingModelBuilder.CreateModel();
|
||||
|
||||
//preprocessingModelBuilder.Save(L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/dx_preprocessor.onnx");
|
||||
const wchar_t* preprocessingModelFilePath = L"C:/Users/numform/Windows-Machine-Learning/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/dx_preprocessor.onnx";
|
||||
|
||||
const bool passTensorsAsD3DResources = true;
|
||||
|
||||
LARGE_INTEGER startTime;
|
||||
LARGE_INTEGER d3dDeviceCreationTime;
|
||||
LARGE_INTEGER sessionCreationTime;
|
||||
LARGE_INTEGER tensorCreationTime;
|
||||
LARGE_INTEGER bindingTime;
|
||||
LARGE_INTEGER runTime;
|
||||
LARGE_INTEGER synchronizeOutputsTime;
|
||||
LARGE_INTEGER cpuFrequency;
|
||||
QueryPerformanceFrequency(&cpuFrequency);
|
||||
QueryPerformanceCounter(&startTime);
|
||||
|
||||
try
|
||||
{
|
||||
ComPtr<ID3D12Device> d3d12Device;
|
||||
THROW_IF_FAILED(D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&d3d12Device)));
|
||||
QueryPerformanceCounter(&d3dDeviceCreationTime);
|
||||
|
||||
OrtApi const& ortApi = Ort::GetApi(); // Uses ORT_API_VERSION
|
||||
const OrtDmlApi* ortDmlApi;
|
||||
THROW_IF_NOT_OK(ortApi.GetExecutionProviderApi("DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ortDmlApi)));
|
||||
|
||||
// ONNX Runtime setup
|
||||
Ort::Env ortEnvironment(ORT_LOGGING_LEVEL_WARNING, "DirectML_Direct3D_TensorAllocation_Test");
|
||||
Ort::SessionOptions sessionOptions;
|
||||
sessionOptions.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
|
||||
sessionOptions.DisableMemPattern();
|
||||
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
||||
ortApi.AddFreeDimensionOverrideByName(sessionOptions, "batch_size", 1);
|
||||
OrtSessionOptionsAppendExecutionProvider_DML(sessionOptions, 0);
|
||||
//Ort::Session session = Ort::Session(ortEnvironment, modelFilePath, sessionOptions);
|
||||
Ort::Session session = Ort::Session(ortEnvironment, preprocessingModelFilePath, sessionOptions);
|
||||
|
||||
QueryPerformanceCounter(&sessionCreationTime);
|
||||
|
||||
Ort::IoBinding ioBinding = Ort::IoBinding::IoBinding(session);
|
||||
const char* memoryInformationName = passTensorsAsD3DResources ? "DML" : "Cpu";
|
||||
Ort::MemoryInfo memoryInformation(memoryInformationName, OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
|
||||
// Not needed: Ort::Allocator allocator(session, memoryInformation);
|
||||
|
||||
// Create input tensor.
|
||||
//Ort::Value inputTensor(nullptr);
|
||||
//std::vector<float> inputTensorValues(static_cast<size_t>(GetElementCount(inferenceInputShape)), 0.0f);
|
||||
//std::iota(inputTensorValues.begin(), inputTensorValues.end(), 0.0f);
|
||||
ComPtr<IUnknown> inputTensorEpWrapper;
|
||||
|
||||
//Ort::Value inputTensor(nullptr);
|
||||
//std::vector<float> inputTensorValues(static_cast<size_t>(GetElementCount(preprocessInputShape)), 0.0f);
|
||||
//std::iota(inputTensorValues.begin(), inputTensorValues.end(), 0.0f);
|
||||
//Microsoft::WRL::ComPtr<IUnknown> inputTensorEpWrapper;
|
||||
|
||||
//// Create empty D3D resource for input.
|
||||
//inputTensor = CreateTensorValueUsingD3DResource(
|
||||
// d3d12Device.Get(),
|
||||
// *ortDmlApi,
|
||||
// memoryInformation,
|
||||
// preprocessInputShape,
|
||||
// ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
// sizeof(float),
|
||||
// /*out*/ IID_PPV_ARGS_Helper(inputTensorEpWrapper.GetAddressOf())
|
||||
//);
|
||||
|
||||
/* Microsoft::WRL::ComPtr<ID3D12GraphicsCommandList> to_cpy;
|
||||
RETURN_IF_FAILED((
|
||||
create_resource_barrier_command_list<D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE>(
|
||||
d3d12Device.Get(),
|
||||
commandQueue,
|
||||
commandAllocator,
|
||||
position_buffer_.Get(),
|
||||
&to_cpy)));
|
||||
|
||||
ID3D12CommandList* const to_cpy_list[] = {
|
||||
to_cpy.Get()
|
||||
};*/
|
||||
|
||||
//commandQueue->ExecuteCommandLists(_countof(to_cpy_list), to_cpy_list);
|
||||
|
||||
Ort::Value inputTensor = CreateTensorValueFromRTVResource(
|
||||
*ortDmlApi,
|
||||
memoryInformation,
|
||||
new_buffer,
|
||||
preprocessInputShape,
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
/*out*/ IID_PPV_ARGS_Helper(inputTensorEpWrapper.GetAddressOf())
|
||||
);
|
||||
|
||||
// Create output tensor on device memory.
|
||||
//Ort::Value outputTensor(nullptr);
|
||||
//std::vector<float> outputTensorValues(static_cast<size_t>(GetElementCount(inferenceOutputShape)), 0.0f);
|
||||
//Microsoft::WRL::ComPtr<IUnknown> outputTensorEpWrapper;
|
||||
|
||||
Ort::Value outputTensor(nullptr);
|
||||
std::vector<float> outputTensorValues(static_cast<size_t>(GetElementCount(preprocessOutputShape)), 0.0f);
|
||||
ComPtr<IUnknown> outputTensorEpWrapper;
|
||||
|
||||
outputTensor = CreateTensorValueUsingD3DResource(
|
||||
d3d12Device.Get(),
|
||||
*ortDmlApi,
|
||||
memoryInformation,
|
||||
preprocessOutputShape,
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
|
||||
sizeof(float),
|
||||
/*out*/ IID_PPV_ARGS_Helper(outputTensorEpWrapper.GetAddressOf())
|
||||
);
|
||||
|
||||
QueryPerformanceCounter(&tensorCreationTime);
|
||||
|
||||
////////////////////////////////////////
|
||||
// Bind the tensor inputs to the model, and run it.
|
||||
ioBinding.BindInput(preprocessModelInputTensorName, inputTensor);
|
||||
ioBinding.BindOutput(preprocessModelOutputTensorName, outputTensor);
|
||||
ioBinding.SynchronizeInputs();
|
||||
QueryPerformanceCounter(&bindingTime);
|
||||
|
||||
Ort::RunOptions runOptions;
|
||||
|
||||
// TODO: Upload inputTensorValues to GPU inputTensor.
|
||||
|
||||
printf("Beginning execution.\n");
|
||||
printf("Running Session.\n");
|
||||
session.Run(runOptions, ioBinding);
|
||||
OutputDebugString(L"Done evaluating preprocessing session");
|
||||
//ioBinding.SynchronizeOutputs();
|
||||
QueryPerformanceCounter(&synchronizeOutputsTime);
|
||||
|
||||
|
||||
auto eval_results_std = EvalORTInference(outputTensor);
|
||||
com_array<float> eval_results(1000);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
eval_results[i] = eval_results_std[i];
|
||||
}
|
||||
return eval_results;
|
||||
}
|
||||
catch (Ort::Exception const& exception)
|
||||
{
|
||||
printf("Error running model inference: %s\n", exception.what());
|
||||
//return EXIT_FAILURE;
|
||||
}
|
||||
catch (std::exception const& exception)
|
||||
{
|
||||
printf("Error running model inference: %s\n", exception.what());
|
||||
//return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int WINAPI StartHWind(HINSTANCE hInstance, //Main windows function
|
||||
|
|
|
@ -7,7 +7,7 @@ namespace winrt::WinMLSamplesGalleryNative::implementation
|
|||
{
|
||||
DXResourceBinding() = default;
|
||||
static int LaunchWindow();
|
||||
static winrt::com_array<float> EvalORT();
|
||||
//static winrt::com_array<float> EvalORT();
|
||||
};
|
||||
}
|
||||
namespace winrt::WinMLSamplesGalleryNative::factory_implementation
|
||||
|
|
|
@ -28,6 +28,6 @@ namespace WinMLSamplesGalleryNative
|
|||
runtimeclass DXResourceBinding
|
||||
{
|
||||
static Int32 LaunchWindow();
|
||||
static float[] EvalORT();
|
||||
//static float[] EvalORT();
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче