diff --git a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj index a12f1657..12ca1b04 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj +++ b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj @@ -144,7 +144,7 @@ true true true - Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;%(AdditionalDependencies) + Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;DXGI.lib;%(AdditionalDependencies) @@ -199,6 +199,7 @@ + @@ -210,6 +211,7 @@ + diff --git a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp index f6d747aa..c6da9a63 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp +++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp @@ -38,6 +38,18 @@ enum OnnxDataType : long { ONNX_BFLOAT16 = 16, }OnnxDataType; +interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE + IDXGraphicsAnalysis : public IUnknown +{ + + STDMETHOD_(void, BeginCapture)() PURE; + + STDMETHOD_(void, EndCapture)() PURE; + +}; +IDXGraphicsAnalysis* pGraphicsAnalysis; + + // TODO: Probably don't need to be globals std::array mean = { 0.485f, 0.456f, 0.406f }; std::array stddev = { 0.229f, 0.224f, 0.225f }; @@ -83,6 +95,8 @@ void BackgroundBlur::SetModels(int w, int h) w /= g_scale; h /= g_scale; SetImageSize(w, h); + HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast(&pGraphicsAnalysis)); + m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev)); m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1)); // Named dim override of FCN-Resnet so that unlock optimizations of fixed input size @@ -109,28 +123,51 @@ LearningModel BackgroundBlur::GetModel() } void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest) { - // TODO: Lock so only one call to each IStreamModel at a time? - if (m_evalResult == nullptr || m_evalResult.Status() != Windows::Foundation::AsyncStatus::Started) { - VideoFrame outVideoFrame=NULL; - { - std::lock_guard guard{ Processing }; - VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); - outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); - SetVideoFrames(inVideoFrame, outVideoFrame); - m_evalResult = RunAsync(); + pGraphicsAnalysis->BeginCapture(); + assert(m_session.Device().AdapterId() == nvidia); + VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); + VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); + SetVideoFrames(inVideoFrame, outVideoFrame); - } - - m_evalResult.Completed([&](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) { - // Ensure only one call to copy out at a time - std::lock_guard guard{ Processing }; - OutputDebugString(L"Eval Completed"); - // StyleTransferEffect copies to a member outputCache video frame and then copies to output outside of first condition - m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); // TODO: Still threading bug here methinks - m_inputVideoFrame.Close(); - m_outputVideoFrame.Close(); - }); - } + // Shape validation + assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels); + assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels); + + assert(m_sessionPreprocess.Device().AdapterId() == nvidia); + assert(m_sessionPostprocess.Device().AdapterId() == nvidia); + + // 2. Preprocessing: z-score normalization + std::vector shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels }; + ITensor intermediateTensor = TensorFloat::Create(shape); + hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name(); + hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name(); + + m_bindingPreprocess.Bind(inputName, m_inputVideoFrame); + outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true)); + m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties); + m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L""); + + // 3. Run through actual model + std::vector FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels }; + ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape); + + m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor); + m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties); + m_session.EvaluateAsync(m_binding, L""); + + // Shape validation + assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels); + assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels); + + // 4. Postprocessing + outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false)); + m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage + m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores + m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame); + // TODO: Make this async as well, and add a completed + m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get(); + m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); + pGraphicsAnalysis->EndCapture(); } winrt::Windows::Foundation::IAsyncOperation BackgroundBlur::RunAsync() diff --git a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h index abcf95ce..b4901e6d 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h +++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h @@ -1,4 +1,6 @@ -#pragma once +#define USE_PIX +#define DBG + #include #include #include @@ -13,17 +15,25 @@ #include #include #include +//#include +#include "common.h" +#include +#include +#include using namespace winrt::Microsoft::AI::MachineLearning; using namespace winrt::Microsoft::AI::MachineLearning::Experimental; using namespace winrt::Windows::Graphics::DirectX::Direct3D11; using namespace winrt::Windows::Media; + + // Model-agnostic helper LearningModels LearningModel Normalize0_1ThenZScore(long height, long width, long channels, const std::array& means, const std::array& stddev); LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w); LearningModel Invert(long n, long c, long h, long w); +//winrt::com_ptr pGraphicsAnalysis; class IStreamModel { diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp index 94fadf3b..65c5714b 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp +++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp @@ -918,10 +918,12 @@ HRESULT TransformAsync::OnStartOfStream(void) HRESULT hr = S_OK; do { + { AutoLock lock(m_critSec); m_dwStatus |= MYMFT_STATUS_STREAM_STARTED; + } /******************************* @@ -1031,6 +1033,7 @@ HRESULT TransformAsync::OnFlush(void) HRESULT hr = S_OK; do { + AutoLock lock(m_critSec); m_dwStatus &= (~MYMFT_STATUS_STREAM_STARTED); @@ -1043,6 +1046,7 @@ HRESULT TransformAsync::OnFlush(void) m_llCurrentSampleTime = 0; // Reset our sample time to 0 on a flush } while (false); + return hr; } diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h index 333b4a95..32180e20 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h +++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h @@ -1,5 +1,4 @@ #pragma once - #include "common/CSampleQueue.h" #include #include @@ -13,6 +12,7 @@ #include #include +#include "common.h" #include #include // DirectShow GUIDs #include @@ -24,6 +24,8 @@ using namespace MediaFoundationSamples; #include "SegmentModel.h" + + // TODO: Do we need the extension marker? // {1F620607-A7FF-4B94-82F4-993F2E17B497} DEFINE_GUID(TransformAsync_MFSampleExtension_Marker, @@ -308,7 +310,7 @@ protected: std::unique_ptr m_streamModel; // TODO: Keep a vector of stream models? // TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as. std::vector> m_models; - int m_numThreads = 2; + int m_numThreads =2; // Pseudocode // int numThreads; needs to be configured by constructor diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp index 3cef3ff4..2da9a318 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp +++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp @@ -4,6 +4,7 @@ #include #include "common/CHWMFT_DebugLogger.h" + //------------------------------------------------------------------- // Name: GetStreamLimits // Returns the minimum and maximum number of streams. @@ -15,7 +16,6 @@ HRESULT TransformAsync::GetStreamLimits( DWORD* pdwOutputMaximum ) { - if ((pdwInputMinimum == NULL) || (pdwInputMaximum == NULL) || (pdwOutputMinimum == NULL) || diff --git a/Samples/BackgroundBlur/BackgroundBlur/common.h b/Samples/BackgroundBlur/BackgroundBlur/common.h index 35eef332..5eb3a53d 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/common.h +++ b/Samples/BackgroundBlur/BackgroundBlur/common.h @@ -1,4 +1,6 @@ #pragma once #include +#include #define CHECK_HR(hr) if (FAILED(hr)) { goto done; } + diff --git a/Samples/BackgroundBlur/BackgroundBlur/packages.config b/Samples/BackgroundBlur/BackgroundBlur/packages.config index 1ea00c67..81849649 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/packages.config +++ b/Samples/BackgroundBlur/BackgroundBlur/packages.config @@ -3,4 +3,5 @@ + \ No newline at end of file