Use non-quantized and utilization goes way up for fcn-resnet

2022-02-16 12:28:21 -08:00 · 2022-02-16 12:28:21 -08:00 · c59da15362
--- a/.gitignore
+++ b/.gitignore
@ -290,6 +290,4 @@ __pycache__/

 # UWP Generated files
 **/Generated Files/
-/Samples/StyleTransfer/Assets
-/Samples/BackgroundBlur/BackgroundBlur/AsyncMFTWrapper.h
-/Samples/BackgroundBlur/BackgroundBlur/AsyncMFTWrapper.cpp
+/Samples/BackgroundBlur/BackgroundBlur/Assets
--- a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
+++ b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
@ -189,6 +189,11 @@
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
      <DestinationFolders Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)/Assets</DestinationFolders>
    </CopyFileToFolders>
+    <CopyFileToFolders Include="Assets\fcn-resnet50-11.onnx">
+      <DeploymentContent>true</DeploymentContent>
+      <FileType>Document</FileType>
+      <DestinationFolders Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)/Assets</DestinationFolders>
+    </CopyFileToFolders>
    <None Include="packages.config" />
  </ItemGroup>
  <ItemGroup>
--- a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj.filters
+++ b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj.filters
@ -36,5 +36,6 @@
  <ItemGroup>
    <CopyFileToFolders Include="Assets\mosaic.onnx" />
    <CopyFileToFolders Include="Assets\fcn-resnet50-12-int8.onnx" />
+    <CopyFileToFolders Include="Assets\fcn-resnet50-11.onnx" />
  </ItemGroup>
 </Project>
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
@ -38,16 +38,7 @@ enum OnnxDataType : long {
 	ONNX_BFLOAT16 = 16,
 }OnnxDataType;

-interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
-	IDXGraphicsAnalysis : public IUnknown
-{

-	STDMETHOD_(void, BeginCapture)() PURE;
-
-	STDMETHOD_(void, EndCapture)() PURE;
-
-}; 
-IDXGraphicsAnalysis* pGraphicsAnalysis;


 // TODO: Probably don't need to be globals
@ -95,8 +86,6 @@ void BackgroundBlur::SetModels(int w, int h)
 	w /= g_scale; h /= g_scale;
 	SetImageSize(w, h);

-	HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast<void**>(&pGraphicsAnalysis));
-
 	m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
 	m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
 	// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
@ -118,12 +107,11 @@ void BackgroundBlur::SetModels(int w, int h)
 LearningModel BackgroundBlur::GetModel()
 {
 	auto rel = std::filesystem::current_path();
-	rel.append("Assets\\fcn-resnet50-12-int8.onnx");
+	rel.append("Assets\\fcn-resnet50-11.onnx");
 	return LearningModel::LoadFromFilePath(rel + L"");
 }
 void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
 {
-	pGraphicsAnalysis->BeginCapture();
 	assert(m_session.Device().AdapterId() == nvidia);
 	VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
 	VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
@ -167,7 +155,6 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
 	// TODO: Make this async as well, and add a completed 
 	m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
 	m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
-	pGraphicsAnalysis->EndCapture();
 }

 winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BackgroundBlur::RunAsync()
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
@ -17,9 +17,7 @@
 #include <winrt/Windows.Media.h>
 //#include <DXProgrammableCapture.h>
 #include "common.h"
-#include <DXGItype.h>
-#include <dxgi1_2.h>
-#include <dxgi1_3.h>
+

 using namespace winrt::Microsoft::AI::MachineLearning;
 using namespace winrt::Microsoft::AI::MachineLearning::Experimental;
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h
@ -310,7 +310,7 @@ protected:
    std::unique_ptr<IStreamModel> m_streamModel; // TODO: Keep a vector of stream models? 
    // TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
    std::vector<std::unique_ptr<IStreamModel>> m_models; 
-    int m_numThreads =2;
+    int m_numThreads =5;

    // Pseudocode
    // int numThreads; needs to be configured by constructor
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync_IMFTransform.cpp
@ -4,6 +4,21 @@
 #include <Mfapi.h>
 #include "common/CHWMFT_DebugLogger.h"

+#include <DXGItype.h>
+#include <dxgi1_2.h>
+#include <dxgi1_3.h>
+
+
+interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
+    IDXGraphicsAnalysis : public IUnknown
+{
+
+    STDMETHOD_(void, BeginCapture)() PURE;
+
+    STDMETHOD_(void, EndCapture)() PURE;
+
+};
+IDXGraphicsAnalysis* pGraphicsAnalysis;

 //-------------------------------------------------------------------
 // Name: GetStreamLimits
@ -683,8 +698,11 @@ HRESULT TransformAsync::ProcessMessage(

    // TODO: Old messages
    case MFT_MESSAGE_NOTIFY_BEGIN_STREAMING:
+    {
+        HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast<void**>(&pGraphicsAnalysis));
        SetupAlloc();
        break;
+    }
    case MFT_MESSAGE_NOTIFY_END_STREAMING:
    default:
        break;
@ -808,6 +826,7 @@ HRESULT TransformAsync::ProcessOutput(
        }
    }
 done:
+    //pGraphicsAnalysis->EndCapture();
    return hr;
 }

@ -818,6 +837,7 @@ HRESULT TransformAsync::ProcessInput(
 {
    HRESULT hr = S_OK;
    {
+        //pGraphicsAnalysis->BeginCapture();
        AutoLock lock(m_critSec);

        if (m_dwNeedInputCount == 0)