* Add feature to bind GPU tensors

* Add test to check for invalid arg

* Spacing

* Make heap properties and resource desc more simple

* Moved GPU upload tensor code under if statement
This commit is contained in:
Ryan Lai 2019-04-15 16:38:21 -07:00 коммит произвёл GitHub
Родитель ba5885c74e
Коммит f2a21529f1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 155 добавлений и 48 удалений

Просмотреть файл

@ -284,7 +284,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
// We need to expect one more line because of the header
Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
}
TEST_METHOD(GarbageInputCpuWinMLDeviceCpuBoundRGBImage)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@ -320,7 +319,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
// We need to expect one more line because of the header
Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
}
TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundRGBImage)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@ -344,19 +342,15 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
// We need to expect one more line because of the header
Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
}
TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundTensor)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
const std::wstring command =
BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
// We need to expect one more line because of the header
Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
// Binding GPU Tensor with Session created with CPU device isn't supported.
Assert::AreEqual(E_INVALIDARG, RunProc((wchar_t*)command.c_str()));
}
TEST_METHOD(GarbageInputGpuClientDeviceCpuBoundRGBImage)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@ -646,7 +640,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath });
Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc((wchar_t *)command.c_str()));
}
TEST_METHOD(ProvidedCSVInputGPUSaveTensor)
TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensor)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@ -656,7 +650,17 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputCPUSaveTensor)
TEST_METHOD(ProvidedCSVInputGPUSaveGpuBoundTensor)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
L"-SaveTensorData", L"First", TENSOR_DATA_PATH, L"-GPU", L"-GPUBoundInput" });
Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensor)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@ -666,7 +670,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",
TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputGPUSaveTensorFp16)
TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensorFp16)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";
const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@ -676,7 +680,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv",
TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputCPUSaveTensorFp16)
TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensorFp16)
{
const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";
const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@ -687,7 +691,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputOnlyGpuSaveTensorImageDenotation)
TEST_METHOD(ProvidedCSVInputOnlyGpuSaveCpuBoundTensorImageDenotation)
{
const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";
const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";
@ -697,7 +701,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_GPU.csv",
TENSOR_DATA_PATH + L"\\Plus214_Output_0GpuIteration1.csv"));
}
TEST_METHOD(ProvidedCSVInputOnlyCpuSaveTensorImageDenotation)
TEST_METHOD(ProvidedCSVInputOnlyCpuSaveCpuBoundTensorImageDenotation)
{
const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";
const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";

Просмотреть файл

@ -31,7 +31,6 @@
<ClInclude Include="src/CommandLineArgs.h" />
<ClInclude Include="src/Common.h" />
<ClInclude Include="src/Filehelper.h" />
<ClInclude Include="src/ModelBinding.h" />
<ClInclude Include="src/OutputHelper.h" />
<ClInclude Include="src/Run.h" />
<ClInclude Include="src/TimerHelper.h" />
@ -234,6 +233,7 @@
<LanguageStandard>stdcpp17</LanguageStandard>
<PrecompiledHeaderFile />
<PrecompiledHeaderOutputFile />
<AdditionalIncludeDirectories>..\..\Samples\CustomTensorization\CustomTensorization;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -254,6 +254,7 @@
<PrecompiledHeaderFile />
<PrecompiledHeaderOutputFile />
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalIncludeDirectories>..\..\Samples\CustomTensorization\CustomTensorization;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>

Просмотреть файл

@ -36,9 +36,6 @@
<ClInclude Include="src/TypeHelper.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src/ModelBinding.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="src/Run.h">
<Filter>Header Files</Filter>
</ClInclude>

Просмотреть файл

@ -3,7 +3,7 @@
#include <time.h>
#include "Common.h"
#include "Windows.AI.Machinelearning.Native.h"
#include "d3dx12.h"
using namespace winrt::Windows::Media;
using namespace winrt::Windows::Storage;
using namespace winrt::Windows::AI::MachineLearning;
@ -327,7 +327,7 @@ namespace BindingUtilities
template <TensorKind T>
static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<std::string>& tensorStringInput,
const IVectorView<int64_t>& tensorShape)
const IVectorView<int64_t>& tensorShape, const InputBindingType inputBindingType)
{
using TensorValue = typename TensorKindToValue<T>::Type;
using DataType = typename TensorKindToType<T>::Type;
@ -372,11 +372,106 @@ namespace BindingUtilities
// Creating Tensors for Input Images haven't been added yet.
throw hresult_not_implemented(L"Creating Tensors for Input Images haven't been implemented yet!");
}
return tensorValue;
if (inputBindingType == InputBindingType::CPU)
{
return tensorValue;
}
else // GPU Tensor
{
com_ptr<ID3D12Resource> pGPUResource = nullptr;
try
{
// create the d3d device.
com_ptr<ID3D12Device> pD3D12Device = nullptr;
D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
reinterpret_cast<void**>(&pD3D12Device));
pD3D12Device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(
actualSizeInBytes,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
D3D12_RESOURCE_STATE_COMMON, nullptr,
__uuidof(ID3D12Resource), pGPUResource.put_void());
if (!args.IsGarbageInput())
{
com_ptr<ID3D12Resource> imageUploadHeap;
// Create the GPU upload buffer.
pD3D12Device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
__uuidof(ID3D12Resource), imageUploadHeap.put_void());
// create the command queue.
com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
reinterpret_cast<void**>(&dxQueue));
com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
com_ptr<::IUnknown> spUnk;
devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());
// Create ID3D12GraphicsCommandList and Allocator
D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
com_ptr<ID3D12CommandAllocator> alloctor;
com_ptr<ID3D12GraphicsCommandList> cmdList;
pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
alloctor.put_void());
pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());
// Copy from Cpu to GPU
D3D12_SUBRESOURCE_DATA CPUData = {};
CPUData.pData = actualData;
CPUData.RowPitch = actualSizeInBytes;
CPUData.SlicePitch = actualSizeInBytes;
UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);
// Close the command list and execute it to begin the initial GPU setup.
cmdList->Close();
ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
// Create Event
HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
// Create Fence
Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
// Adds fence to queue
THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));
// Wait for signal
DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
if (retVal != WAIT_OBJECT_0)
{
THROW_IF_FAILED(E_UNEXPECTED);
}
}
}
catch (...)
{
std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
throw;
}
com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
com_ptr<::IUnknown> spUnkTensor;
tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), vecShape.data(), static_cast<int>(vecShape.size()), spUnkTensor.put());
TensorValue returnTensor(nullptr);
spUnkTensor.try_as(returnTensor);
return returnTensor;
}
}
// Binds tensor floats, ints, doubles from CSV data.
ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const CommandLineArgs& args)
ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const CommandLineArgs& args,
const InputBindingType inputBindingType)
{
std::vector<std::string> elementStrings;
if (!args.CsvPath().empty())
@ -407,7 +502,7 @@ namespace BindingUtilities
std::vector<int64_t> shape = { 1, channels, imageFeatureDescriptor.Height(),
imageFeatureDescriptor.Width() };
IVectorView<int64_t> shapeVectorView = single_threaded_vector(std::move(shape)).GetView();
return CreateTensor<TensorKind::Float>(args, elementStrings, shapeVectorView);
return CreateTensor<TensorKind::Float>(args, elementStrings, shapeVectorView, inputBindingType);
}
auto tensorDescriptor = description.try_as<TensorFeatureDescriptor>();
@ -422,57 +517,68 @@ namespace BindingUtilities
}
case TensorKind::Float:
{
return CreateTensor<TensorKind::Float>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Float>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Float16:
{
return CreateTensor<TensorKind::Float16>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Float16>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Double:
{
return CreateTensor<TensorKind::Double>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Double>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Int8:
{
return CreateTensor<TensorKind::Int8>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Int8>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::UInt8:
{
return CreateTensor<TensorKind::UInt8>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::UInt8>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Int16:
{
return CreateTensor<TensorKind::Int16>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Int16>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::UInt16:
{
return CreateTensor<TensorKind::UInt16>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::UInt16>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Int32:
{
return CreateTensor<TensorKind::Int32>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Int32>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::UInt32:
{
return CreateTensor<TensorKind::UInt32>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::UInt32>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::Int64:
{
return CreateTensor<TensorKind::Int64>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::Int64>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
case TensorKind::UInt64:
{
return CreateTensor<TensorKind::UInt64>(args, elementStrings, tensorDescriptor.Shape());
return CreateTensor<TensorKind::UInt64>(args, elementStrings, tensorDescriptor.Shape(),
inputBindingType);
}
break;
}

Просмотреть файл

@ -20,10 +20,10 @@ std::vector<ILearningModelFeatureValue> GenerateInputFeatures(const LearningMode
{
auto&& description = model.InputFeatures().GetAt(i);
if (inputDataType == InputDataType::Tensor || i > 0)
if (inputDataType == InputDataType::Tensor)
{
// If CSV data is provided, then every input will contain the same CSV data
auto tensorFeature = BindingUtilities::CreateBindableTensor(description, args);
auto tensorFeature = BindingUtilities::CreateBindableTensor(description, args, inputBindingType);
inputFeatures.push_back(tensorFeature);
}
else
@ -214,6 +214,11 @@ HRESULT BindInputs(LearningModelBinding &context, const LearningModel& model, co
const IDirect3DDevice& winrtDevice, DeviceCreationLocation deviceCreationLocation, uint32_t iteration,
Profiler<WINML_MODEL_TEST_PERF>& profiler)
{
if (deviceType == DeviceType::CPU && inputDataType == InputDataType::Tensor && inputBindingType == InputBindingType::GPU)
{
std::cout << "Cannot create D3D12 device on client if CPU device type is selected." << std::endl;
return E_INVALIDARG;
}
bool useInputData = false;
bool isGarbageData = args.IsGarbageInput();
std::string completionString = "\n";
@ -315,15 +320,6 @@ HRESULT CheckIfModelAndConfigurationsAreSupported(LearningModel& model, const st
}
}
}
// Creating D3D12 device on client doesn't make sense for CPU deviceType
if (deviceType == DeviceType::CPU && std::any_of(deviceCreationLocations.begin(), deviceCreationLocations.end(),
[](const DeviceCreationLocation deviceCreationLocation) {
return deviceCreationLocation == DeviceCreationLocation::UserD3DDevice; }))
{
std::cout << "Cannot create D3D12 device on client if CPU device type is selected." << std::endl;
return E_INVALIDARG;
}
return S_OK;
}
@ -508,10 +504,10 @@ int run(CommandLineArgs& args, Profiler<WINML_MODEL_TEST_PERF>& profiler) try
for (auto deviceType : deviceTypes)
{
lastHr = CheckIfModelAndConfigurationsAreSupported(model, path, deviceType, inputDataTypes,
deviceCreationLocations);
deviceCreationLocations);
if (FAILED(lastHr))
{
continue;
continue;
}
for (auto deviceCreationLocation : deviceCreationLocations)
{
@ -550,7 +546,10 @@ int run(CommandLineArgs& args, Profiler<WINML_MODEL_TEST_PERF>& profiler) try
LearningModelBinding context(session);
lastHr = BindInputs(context, model, session, output, deviceType, args, inputBindingType,
inputDataType, winrtDevice, deviceCreationLocation, i, profiler);
if (FAILED(lastHr))
{
break;
}
LearningModelEvaluationResult result = nullptr;
bool capture_perf = args.IsPerformanceCapture() || args.IsPerIterationCapture();
lastHr = EvaluateModel(result, model, context, session, args, output,