try to get a gpu capture
This commit is contained in:
Родитель
61a595f507
Коммит
9a53abdf3c
|
@ -144,7 +144,7 @@
|
|||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;DXGI.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
@ -199,6 +199,7 @@
|
|||
<Import Project="..\packages\Microsoft.Windows.CppWinRT.2.0.211028.7\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('..\packages\Microsoft.Windows.CppWinRT.2.0.211028.7\build\native\Microsoft.Windows.CppWinRT.targets')" />
|
||||
<Import Project="..\packages\Microsoft.AI.DirectML.1.8.0\build\Microsoft.AI.DirectML.targets" Condition="Exists('..\packages\Microsoft.AI.DirectML.1.8.0\build\Microsoft.AI.DirectML.targets')" />
|
||||
<Import Project="..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.targets" Condition="Exists('..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.targets')" />
|
||||
<Import Project="..\packages\WinPixEventRuntime.1.0.220124001\build\WinPixEventRuntime.targets" Condition="Exists('..\packages\WinPixEventRuntime.1.0.220124001\build\WinPixEventRuntime.targets')" />
|
||||
</ImportGroup>
|
||||
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
|
||||
<PropertyGroup>
|
||||
|
@ -210,6 +211,7 @@
|
|||
<Error Condition="!Exists('..\packages\Microsoft.AI.DirectML.1.8.0\build\Microsoft.AI.DirectML.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.AI.DirectML.1.8.0\build\Microsoft.AI.DirectML.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.props')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.props'))" />
|
||||
<Error Condition="!Exists('..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.AI.MachineLearning.1.10.0\build\native\Microsoft.AI.MachineLearning.targets'))" />
|
||||
<Error Condition="!Exists('..\packages\WinPixEventRuntime.1.0.220124001\build\WinPixEventRuntime.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\WinPixEventRuntime.1.0.220124001\build\WinPixEventRuntime.targets'))" />
|
||||
</Target>
|
||||
<Target Name="DownloadContentFiles" BeforeTargets="Build">
|
||||
<DownloadFile Condition="!Exists('$(MSBuildProjectDirectory)\Assets\fcn-resnet50-11.onnx')" SourceUrl="https://github.com/onnx/models/raw/master/vision/object_detection_segmentation/fcn/model/fcn-resnet50-11.onnx" DestinationFolder="$(MSBuildProjectDirectory)\Assets">
|
||||
|
|
|
@ -38,6 +38,18 @@ enum OnnxDataType : long {
|
|||
ONNX_BFLOAT16 = 16,
|
||||
}OnnxDataType;
|
||||
|
||||
interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
|
||||
IDXGraphicsAnalysis : public IUnknown
|
||||
{
|
||||
|
||||
STDMETHOD_(void, BeginCapture)() PURE;
|
||||
|
||||
STDMETHOD_(void, EndCapture)() PURE;
|
||||
|
||||
};
|
||||
IDXGraphicsAnalysis* pGraphicsAnalysis;
|
||||
|
||||
|
||||
// TODO: Probably don't need to be globals
|
||||
std::array<float, 3> mean = { 0.485f, 0.456f, 0.406f };
|
||||
std::array<float, 3> stddev = { 0.229f, 0.224f, 0.225f };
|
||||
|
@ -83,6 +95,8 @@ void BackgroundBlur::SetModels(int w, int h)
|
|||
w /= g_scale; h /= g_scale;
|
||||
SetImageSize(w, h);
|
||||
|
||||
HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast<void**>(&pGraphicsAnalysis));
|
||||
|
||||
m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
|
||||
m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
|
||||
// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
|
||||
|
@ -109,28 +123,51 @@ LearningModel BackgroundBlur::GetModel()
|
|||
}
|
||||
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
||||
{
|
||||
// TODO: Lock so only one call to each IStreamModel at a time?
|
||||
if (m_evalResult == nullptr || m_evalResult.Status() != Windows::Foundation::AsyncStatus::Started) {
|
||||
VideoFrame outVideoFrame=NULL;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard{ Processing };
|
||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||
outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||
SetVideoFrames(inVideoFrame, outVideoFrame);
|
||||
m_evalResult = RunAsync();
|
||||
pGraphicsAnalysis->BeginCapture();
|
||||
assert(m_session.Device().AdapterId() == nvidia);
|
||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||
SetVideoFrames(inVideoFrame, outVideoFrame);
|
||||
|
||||
}
|
||||
|
||||
m_evalResult.Completed([&](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) {
|
||||
// Ensure only one call to copy out at a time
|
||||
std::lock_guard<std::mutex> guard{ Processing };
|
||||
OutputDebugString(L"Eval Completed");
|
||||
// StyleTransferEffect copies to a member outputCache video frame and then copies to output outside of first condition
|
||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); // TODO: Still threading bug here methinks
|
||||
m_inputVideoFrame.Close();
|
||||
m_outputVideoFrame.Close();
|
||||
});
|
||||
}
|
||||
// Shape validation
|
||||
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
|
||||
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
|
||||
|
||||
assert(m_sessionPreprocess.Device().AdapterId() == nvidia);
|
||||
assert(m_sessionPostprocess.Device().AdapterId() == nvidia);
|
||||
|
||||
// 2. Preprocessing: z-score normalization
|
||||
std::vector<int64_t> shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
|
||||
ITensor intermediateTensor = TensorFloat::Create(shape);
|
||||
hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name();
|
||||
hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name();
|
||||
|
||||
m_bindingPreprocess.Bind(inputName, m_inputVideoFrame);
|
||||
outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
|
||||
m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties);
|
||||
m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L"");
|
||||
|
||||
// 3. Run through actual model
|
||||
std::vector<int64_t> FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
|
||||
ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);
|
||||
|
||||
m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
|
||||
m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
|
||||
m_session.EvaluateAsync(m_binding, L"");
|
||||
|
||||
// Shape validation
|
||||
assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
|
||||
assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
|
||||
|
||||
// 4. Postprocessing
|
||||
outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
|
||||
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage
|
||||
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
|
||||
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
|
||||
// TODO: Make this async as well, and add a completed
|
||||
m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
|
||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||
pGraphicsAnalysis->EndCapture();
|
||||
}
|
||||
|
||||
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BackgroundBlur::RunAsync()
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#pragma once
|
||||
#define USE_PIX
|
||||
#define DBG
|
||||
|
||||
#include <winrt/Microsoft.AI.MachineLearning.Experimental.h>
|
||||
#include <winrt/Microsoft.AI.MachineLearning.h>
|
||||
#include <Windows.AI.MachineLearning.native.h>
|
||||
|
@ -13,17 +15,25 @@
|
|||
#include <mutex>
|
||||
#include <winrt/windows.foundation.collections.h>
|
||||
#include <winrt/Windows.Media.h>
|
||||
//#include <DXProgrammableCapture.h>
|
||||
#include "common.h"
|
||||
#include <DXGItype.h>
|
||||
#include <dxgi1_2.h>
|
||||
#include <dxgi1_3.h>
|
||||
|
||||
using namespace winrt::Microsoft::AI::MachineLearning;
|
||||
using namespace winrt::Microsoft::AI::MachineLearning::Experimental;
|
||||
using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
|
||||
using namespace winrt::Windows::Media;
|
||||
|
||||
|
||||
|
||||
// Model-agnostic helper LearningModels
|
||||
LearningModel Normalize0_1ThenZScore(long height, long width, long channels, const std::array<float, 3>& means, const std::array<float, 3>& stddev);
|
||||
LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w);
|
||||
LearningModel Invert(long n, long c, long h, long w);
|
||||
|
||||
//winrt::com_ptr<IDXGraphicsAnalysis> pGraphicsAnalysis;
|
||||
|
||||
class IStreamModel
|
||||
{
|
||||
|
|
|
@ -918,10 +918,12 @@ HRESULT TransformAsync::OnStartOfStream(void)
|
|||
HRESULT hr = S_OK;
|
||||
do
|
||||
{
|
||||
|
||||
{
|
||||
AutoLock lock(m_critSec);
|
||||
|
||||
m_dwStatus |= MYMFT_STATUS_STREAM_STARTED;
|
||||
|
||||
}
|
||||
|
||||
/*******************************
|
||||
|
@ -1031,6 +1033,7 @@ HRESULT TransformAsync::OnFlush(void)
|
|||
HRESULT hr = S_OK;
|
||||
do
|
||||
{
|
||||
|
||||
AutoLock lock(m_critSec);
|
||||
|
||||
m_dwStatus &= (~MYMFT_STATUS_STREAM_STARTED);
|
||||
|
@ -1043,6 +1046,7 @@ HRESULT TransformAsync::OnFlush(void)
|
|||
|
||||
m_llCurrentSampleTime = 0; // Reset our sample time to 0 on a flush
|
||||
} while (false);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/CSampleQueue.h"
|
||||
#include <Mfidl.h>
|
||||
#include <mftransform.h>
|
||||
|
@ -13,6 +12,7 @@
|
|||
#include <evr.h>
|
||||
#include <mfobjects.h>
|
||||
|
||||
#include "common.h"
|
||||
#include <initguid.h>
|
||||
#include <uuids.h> // DirectShow GUIDs
|
||||
#include <d3d9types.h>
|
||||
|
@ -24,6 +24,8 @@
|
|||
using namespace MediaFoundationSamples;
|
||||
#include "SegmentModel.h"
|
||||
|
||||
|
||||
|
||||
// TODO: Do we need the extension marker?
|
||||
// {1F620607-A7FF-4B94-82F4-993F2E17B497}
|
||||
DEFINE_GUID(TransformAsync_MFSampleExtension_Marker,
|
||||
|
@ -308,7 +310,7 @@ protected:
|
|||
std::unique_ptr<IStreamModel> m_streamModel; // TODO: Keep a vector of stream models?
|
||||
// TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
|
||||
std::vector<std::unique_ptr<IStreamModel>> m_models;
|
||||
int m_numThreads = 2;
|
||||
int m_numThreads =2;
|
||||
|
||||
// Pseudocode
|
||||
// int numThreads; needs to be configured by constructor
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <Mfapi.h>
|
||||
#include "common/CHWMFT_DebugLogger.h"
|
||||
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// Name: GetStreamLimits
|
||||
// Returns the minimum and maximum number of streams.
|
||||
|
@ -15,7 +16,6 @@ HRESULT TransformAsync::GetStreamLimits(
|
|||
DWORD* pdwOutputMaximum
|
||||
)
|
||||
{
|
||||
|
||||
if ((pdwInputMinimum == NULL) ||
|
||||
(pdwInputMaximum == NULL) ||
|
||||
(pdwOutputMinimum == NULL) ||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#pragma once
|
||||
#include <winrt/Windows.Foundation.h>
|
||||
#include <pix3.h>
|
||||
|
||||
#define CHECK_HR(hr) if (FAILED(hr)) { goto done; }
|
||||
|
||||
|
|
|
@ -3,4 +3,5 @@
|
|||
<package id="Microsoft.AI.DirectML" version="1.8.0" targetFramework="native" />
|
||||
<package id="Microsoft.AI.MachineLearning" version="1.10.0" targetFramework="native" />
|
||||
<package id="Microsoft.Windows.CppWinRT" version="2.0.211028.7" targetFramework="native" />
|
||||
<package id="WinPixEventRuntime" version="1.0.220124001" targetFramework="native" />
|
||||
</packages>
|
Загрузка…
Ссылка в новой задаче