diff --git a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
index a12f1657..12ca1b04 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
+++ b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
@@ -144,7 +144,7 @@
true
true
true
- Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;%(AdditionalDependencies)
+ Mfplat.lib;Mf.lib;Mfcore.lib;evr.lib;mfuuid.lib;powrprof.lib;d3d11.lib;mf.lib;mfplat.lib;shlwapi.lib;Comctl32.lib;DXGI.lib;%(AdditionalDependencies)
@@ -199,6 +199,7 @@
+
@@ -210,6 +211,7 @@
+
diff --git a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
index f6d747aa..c6da9a63 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
@@ -38,6 +38,18 @@ enum OnnxDataType : long {
ONNX_BFLOAT16 = 16,
}OnnxDataType;
+interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
+ IDXGraphicsAnalysis : public IUnknown
+{
+
+ STDMETHOD_(void, BeginCapture)() PURE;
+
+ STDMETHOD_(void, EndCapture)() PURE;
+
+};
+IDXGraphicsAnalysis* pGraphicsAnalysis;
+
+
// TODO: Probably don't need to be globals
std::array mean = { 0.485f, 0.456f, 0.406f };
std::array stddev = { 0.229f, 0.224f, 0.225f };
@@ -83,6 +95,8 @@ void BackgroundBlur::SetModels(int w, int h)
w /= g_scale; h /= g_scale;
SetImageSize(w, h);
+ HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast(&pGraphicsAnalysis));
+
m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
@@ -109,28 +123,51 @@ LearningModel BackgroundBlur::GetModel()
}
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
{
- // TODO: Lock so only one call to each IStreamModel at a time?
- if (m_evalResult == nullptr || m_evalResult.Status() != Windows::Foundation::AsyncStatus::Started) {
- VideoFrame outVideoFrame=NULL;
- {
- std::lock_guard guard{ Processing };
- VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
- outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
- SetVideoFrames(inVideoFrame, outVideoFrame);
- m_evalResult = RunAsync();
+ pGraphicsAnalysis->BeginCapture();
+ assert(m_session.Device().AdapterId() == nvidia);
+ VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
+ VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
+ SetVideoFrames(inVideoFrame, outVideoFrame);
- }
-
- m_evalResult.Completed([&](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) {
- // Ensure only one call to copy out at a time
- std::lock_guard guard{ Processing };
- OutputDebugString(L"Eval Completed");
- // StyleTransferEffect copies to a member outputCache video frame and then copies to output outside of first condition
- m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); // TODO: Still threading bug here methinks
- m_inputVideoFrame.Close();
- m_outputVideoFrame.Close();
- });
- }
+ // Shape validation
+ assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
+ assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
+
+ assert(m_sessionPreprocess.Device().AdapterId() == nvidia);
+ assert(m_sessionPostprocess.Device().AdapterId() == nvidia);
+
+ // 2. Preprocessing: z-score normalization
+ std::vector shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
+ ITensor intermediateTensor = TensorFloat::Create(shape);
+ hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name();
+ hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name();
+
+ m_bindingPreprocess.Bind(inputName, m_inputVideoFrame);
+ outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
+ m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties);
+ m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L"");
+
+ // 3. Run through actual model
+ std::vector FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
+ ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);
+
+ m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
+ m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
+ m_session.EvaluateAsync(m_binding, L"");
+
+ // Shape validation
+ assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
+ assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
+
+ // 4. Postprocessing
+ outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
+ m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage
+ m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
+ m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
+ // TODO: Make this async as well, and add a completed
+ m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
+ m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
+ pGraphicsAnalysis->EndCapture();
}
winrt::Windows::Foundation::IAsyncOperation BackgroundBlur::RunAsync()
diff --git a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
index abcf95ce..b4901e6d 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
@@ -1,4 +1,6 @@
-#pragma once
+#define USE_PIX
+#define DBG
+
#include
#include
#include
@@ -13,17 +15,25 @@
#include
#include
#include
+//#include
+#include "common.h"
+#include
+#include
+#include
using namespace winrt::Microsoft::AI::MachineLearning;
using namespace winrt::Microsoft::AI::MachineLearning::Experimental;
using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
using namespace winrt::Windows::Media;
+
+
// Model-agnostic helper LearningModels
LearningModel Normalize0_1ThenZScore(long height, long width, long channels, const std::array& means, const std::array& stddev);
LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w);
LearningModel Invert(long n, long c, long h, long w);
+//winrt::com_ptr pGraphicsAnalysis;
class IStreamModel
{
diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
index 94fadf3b..65c5714b 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
@@ -918,10 +918,12 @@ HRESULT TransformAsync::OnStartOfStream(void)
HRESULT hr = S_OK;
do
{
+
{
AutoLock lock(m_critSec);
m_dwStatus |= MYMFT_STATUS_STREAM_STARTED;
+
}
/*******************************
@@ -1031,6 +1033,7 @@ HRESULT TransformAsync::OnFlush(void)
HRESULT hr = S_OK;
do
{
+
AutoLock lock(m_critSec);
m_dwStatus &= (~MYMFT_STATUS_STREAM_STARTED);
@@ -1043,6 +1046,7 @@ HRESULT TransformAsync::OnFlush(void)
m_llCurrentSampleTime = 0; // Reset our sample time to 0 on a flush
} while (false);
+
return hr;
}
diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h
index 333b4a95..32180e20 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h
@@ -1,5 +1,4 @@
#pragma once
-
#include "common/CSampleQueue.h"
#include
#include
@@ -13,6 +12,7 @@
#include
#include
+#include "common.h"
#include
#include // DirectShow GUIDs
#include
@@ -24,6 +24,8 @@
using namespace MediaFoundationSamples;
#include "SegmentModel.h"
+
+
// TODO: Do we need the extension marker?
// {1F620607-A7FF-4B94-82F4-993F2E17B497}
DEFINE_GUID(TransformAsync_MFSampleExtension_Marker,
@@ -308,7 +310,7 @@ protected:
std::unique_ptr m_streamModel; // TODO: Keep a vector of stream models?
// TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
std::vector> m_models;
- int m_numThreads = 2;
+ int m_numThreads =2;
// Pseudocode
// int numThreads; needs to be configured by constructor
diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp
index 3cef3ff4..2da9a318 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp
@@ -4,6 +4,7 @@
#include
#include "common/CHWMFT_DebugLogger.h"
+
//-------------------------------------------------------------------
// Name: GetStreamLimits
// Returns the minimum and maximum number of streams.
@@ -15,7 +16,6 @@ HRESULT TransformAsync::GetStreamLimits(
DWORD* pdwOutputMaximum
)
{
-
if ((pdwInputMinimum == NULL) ||
(pdwInputMaximum == NULL) ||
(pdwOutputMinimum == NULL) ||
diff --git a/Samples/BackgroundBlur/BackgroundBlur/common.h b/Samples/BackgroundBlur/BackgroundBlur/common.h
index 35eef332..5eb3a53d 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/common.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/common.h
@@ -1,4 +1,6 @@
#pragma once
#include
+#include
#define CHECK_HR(hr) if (FAILED(hr)) { goto done; }
+
diff --git a/Samples/BackgroundBlur/BackgroundBlur/packages.config b/Samples/BackgroundBlur/BackgroundBlur/packages.config
index 1ea00c67..81849649 100644
--- a/Samples/BackgroundBlur/BackgroundBlur/packages.config
+++ b/Samples/BackgroundBlur/BackgroundBlur/packages.config
@@ -3,4 +3,5 @@
+
\ No newline at end of file