Clean up IStreamModel

2022-03-11 15:15:02 -08:00 · 2022-03-11 15:15:02 -08:00 · 9efe50109e
--- a/.gitignore
+++ b/.gitignore
@ -291,3 +291,6 @@ __pycache__/
 # UWP Generated files
 **/Generated Files/
 /Samples/BackgroundBlur/BackgroundBlur/Assets
+/Samples/StyleTransfer/Assets
+/Samples/CustomTensorization/CustomTensorization/output_gpu.png
+/Samples/CustomTensorization/CustomTensorization/output_cpu.png
--- a/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
+++ b/Samples/BackgroundBlur/BackgroundBlur/BackgroundBlur.vcxproj
@ -26,33 +26,33 @@
    <Keyword>Win32Proj</Keyword>
    <ProjectGuid>{7519dde1-9348-4054-81aa-0456dac71cbd}</ProjectGuid>
    <RootNamespace>WindowsProject1</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
    <ProjectName>BackgroundBlur</ProjectName>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
@ -39,8 +39,6 @@ enum OnnxDataType : long {


 int g_scale = 5;
-std::array<float, 3> mean = { 0.485f, 0.456f, 0.406f };
-std::array<float, 3> stddev = { 0.229f, 0.224f, 0.225f };
 auto outputBindProperties = PropertySet();

 /****	Style transfer model	****/
@ -73,14 +71,8 @@ void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
 	m_outputVideoFrame.CopyToAsync(outVideoFrame).get();

 	m_bSyncStarted = FALSE;
-	m_canRunEval.notify_one();
-}
-VideoFrame StyleTransfer::RunAsync(IDirect3DSurface src, IDirect3DSurface dest)
-{
-	// TODO: Implement async StyleTransfer
-	//m_evalStatus = NULL;
-	return NULL;
 }
+
 LearningModel StyleTransfer::GetModel()
 {
 	auto rel = std::filesystem::current_path();
@ -89,37 +81,22 @@ LearningModel StyleTransfer::GetModel()
 }

 BackgroundBlur::~BackgroundBlur() {
-	if (m_sessionFused) m_sessionFused.Close();
-	if (m_sessionPostprocess) m_sessionPostprocess.Close();
-	if (m_sessionPreprocess) m_sessionPreprocess.Close();
-	//if (m_bindFused) m_bindFused.Clear();
+	if (m_session) m_session.Close();
 }
+
 /****	Background blur model	****/
 void BackgroundBlur::SetModels(int w, int h)
 {
 	w /= g_scale; h /= g_scale;
 	SetImageSize(w, h);

-	m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
-	m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
-	// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
-	auto fcnDevice = m_bUseGPU ? LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance) : LearningModelDevice(LearningModelDeviceKind::Default); // Todo: Have a toggle between GPU/ CPU? 
-	auto model = GetModel();
-	auto FCNInputName = model.OutputFeatures().GetAt(0).Name();
-	auto options = LearningModelSessionOptions();
-	options.BatchSizeOverride(0);
-	options.CloseModelOnSessionCreation(true);
-	options.OverrideNamedDimension(L"height", m_imageHeightInPixels);
-	options.OverrideNamedDimension(L"width", m_imageWidthInPixels);
-	m_session = LearningModelSession(model, fcnDevice, options);
-
 	auto joinOptions1 = LearningModelJoinOptions();
 	joinOptions1.CloseModelOnJoin(true);
 	joinOptions1.Link(L"Output", L"input");
 	joinOptions1.JoinedNodePrefix(L"FCN_");
 	joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true);
-	auto modelExperimental = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
-	LearningModel stageOne = modelExperimental.JoinModel(GetModel(), joinOptions1);
+	auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev));
+	LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1);

 	auto joinOptions2 = LearningModelJoinOptions();
 	joinOptions2.CloseModelOnJoin(true);
@ -127,17 +104,14 @@ void BackgroundBlur::SetModels(int w, int h)
 	joinOptions2.Link(L"OutputImageForward", L"InputImage");
 	joinOptions2.JoinedNodePrefix(L"Post_");
 	//joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU
-	auto modelTwo = LearningModelExperimental(stageOne);
-	LearningModel modelFused = modelTwo.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2);
+	auto modelExperimental2 = LearningModelExperimental(intermediateModel);
+	LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2);

-	modelTwo.Save(L"modelTwo.onnx");
+	// Save the model for debugging purposes
+	//modelExperimental2.Save(L"modelFused.onnx");

-	m_sessionFused = CreateLearningModelSession(modelFused);
-	m_bindFused = LearningModelBinding(m_sessionFused);
-
-	m_bindingPreprocess = LearningModelBinding(m_sessionPreprocess);
+	m_session = CreateLearningModelSession(modelFused);
 	m_binding = LearningModelBinding(m_session);
-	m_bindingPostprocess = LearningModelBinding(m_sessionPostprocess);
 }
 LearningModel BackgroundBlur::GetModel()
 {
@ -149,6 +123,7 @@ LearningModel BackgroundBlur::GetModel()
 void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
 {
 	m_bSyncStarted = TRUE;
+	// Device validation
 	assert(m_session.Device().AdapterId() == m_highPerfAdapter);
 	VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
 	VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
@ -158,117 +133,14 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
 	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
 	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);

-	hstring inputName = m_sessionFused.Model().InputFeatures().GetAt(0).Name();
-	hstring outputName = m_sessionFused.Model().OutputFeatures().GetAt(1).Name();
+	hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
+	hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name();

-	m_bindFused.Bind(inputName, m_inputVideoFrame);
-	m_bindFused.Bind(outputName, m_outputVideoFrame);
-	auto results = m_sessionFused.Evaluate(m_bindFused, L"");
+	m_binding.Bind(inputName, m_inputVideoFrame);
+	m_binding.Bind(outputName, m_outputVideoFrame);
+	auto results = m_session.Evaluate(m_binding, L"");
 	m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
 	m_bSyncStarted = FALSE;
-	m_canRunEval.notify_one();
-}
-
-//void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
-//{
-//	m_bSyncStarted = TRUE;
-//	assert(m_session.Device().AdapterId() == m_highPerfAdapter);
-//	VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
-//	VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
-//	SetVideoFrames(inVideoFrame, outVideoFrame);
-//
-//	// Shape validation
-//	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
-//	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
-//
-//	assert(m_sessionPreprocess.Device().AdapterId() == m_highPerfAdapter);
-//	assert(m_sessionPostprocess.Device().AdapterId() == m_highPerfAdapter);
-//
-//	// 2. Preprocessing: z-score normalization 
-//	std::vector<int64_t> shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
-//	ITensor intermediateTensor = TensorFloat::Create(shape);
-//	hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name();
-//	hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name();
-//
-//	m_bindingPreprocess.Bind(inputName, m_inputVideoFrame);
-//	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
-//	m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties);
-//	m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L"");
-//
-//	// 3. Run through actual model
-//	std::vector<int64_t> FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
-//	ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);
-//
-//	m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
-//	m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
-//	m_session.EvaluateAsync(m_binding, L"");
-//
-//	// Shape validation 
-//	assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
-//	assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
-//
-//	// 4. Postprocessing
-//	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
-//	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage
-//	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
-//	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
-//	// TODO: Make this async as well, and add a completed 
-//	m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
-//	m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
-//	m_bSyncStarted = FALSE;
-//	m_canRunEval.notify_one();
-//}
-
-VideoFrame BackgroundBlur::RunAsync(IDirect3DSurface src, IDirect3DSurface dest)
-{
-	assert(m_session.Device().AdapterId() == m_highPerfAdapter);
-	VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
-	VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
-	SetVideoFrames(inVideoFrame, outVideoFrame);
-
-	// Shape validation
-	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
-	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
-
-	assert(m_sessionPreprocess.Device().AdapterId() == m_highPerfAdapter);
-	assert(m_sessionPostprocess.Device().AdapterId() == m_highPerfAdapter);
-
-	// 2. Preprocessing: z-score normalization 
-	std::vector<int64_t> shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
-	ITensor intermediateTensor = TensorFloat::Create(shape);
-	hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name();
-	hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name();
-
-	m_bindingPreprocess.Bind(inputName, m_inputVideoFrame);
-	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
-	m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties);
-	m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L"");
-
-	// 3. Run through actual model
-	std::vector<int64_t> FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
-	ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);
-
-	m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
-	m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
-	m_session.EvaluateAsync(m_binding, L"");
-
-	// Shape validation 
-	assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
-	assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
-
-	// 4. Postprocessing
-	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
-	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage
-	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
-	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
-	m_evalStatus = m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"");
-	// m_evalStatus = m_outputVideoFrame.CopyToAsync(outVideoFrame);
-	/*auto makeOutput = [&outVideoFrame]() -> winrt::Windows::Foundation::IAsyncOperation<VideoFrame> { co_return outVideoFrame; };
-	m_evalStatus = makeOutput();*/
-
-	// todo: go back to have this return AsyncStatus for when done with copytoasync? 
-	m_outputVideoFrame.CopyToAsync(outVideoFrame);
-	return outVideoFrame;
 }

 LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis)
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
@ -53,9 +53,9 @@ public:
 		if (m_outputVideoFrame) m_outputVideoFrame.Close();
 		if (m_device) m_device.Close();
 	};
+
 	virtual void SetModels(int w, int h) =0;
 	virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) =0;
-	virtual VideoFrame RunAsync(IDirect3DSurface src, IDirect3DSurface dest) = 0;

 	void SetUseGPU(bool use) { 
 		m_bUseGPU = use;
@ -66,16 +66,13 @@ public:
 		m_device = m_session.Device().Direct3D11Device();
 		auto device = m_session.Device().AdapterId();
 	}
-	winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> m_evalStatus;
-	BOOL m_bSyncStarted; // TODO: Construct an IStreamModel as sync/async, then have a GetStatus to query this or m_evalStatus
+	
+	// Synchronous eval status
+	BOOL m_bSyncStarted; 
 	VideoFrame m_outputVideoFrame;
-	std::condition_variable m_canRunEval;
-	std::mutex Processing;
-	std::mutex m_runMutex;

 protected:
-	winrt::Windows::Graphics::DisplayAdapterId m_highPerfAdapter{};
-
+	// Cache input frames into a shareable d3d-backed VideoFrame
 	void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame) 
 	{
 		if (true || !m_bVideoFramesSet)
@ -123,23 +120,25 @@ protected:
 	UINT32                      m_imageWidthInPixels;
 	UINT32                      m_imageHeightInPixels;
 	IDirect3DDevice				m_device;
+	// For debugging potential device issues
+	winrt::Windows::Graphics::DisplayAdapterId m_highPerfAdapter{};

 	// Learning Model Binding and Session. 
 	LearningModelSession m_session;
 	LearningModelBinding m_binding;
-
 }; 


 class StyleTransfer : public IStreamModel {
 public:
-	StyleTransfer(int w, int h) : IStreamModel(w, h) {
-		SetModels(w, h); }
+	StyleTransfer(int w, int h) : IStreamModel(w, h) 
+	{
+		SetModels(w, h); 
+	}
 	StyleTransfer() : IStreamModel() {};
 	~StyleTransfer(){};
 	void SetModels(int w, int h);
 	void Run(IDirect3DSurface src, IDirect3DSurface dest);
-	VideoFrame RunAsync(IDirect3DSurface src, IDirect3DSurface dest);
 private: 
 	LearningModel GetModel();
 };
@ -149,43 +148,24 @@ class BackgroundBlur : public IStreamModel
 {
 public:
 	BackgroundBlur(int w, int h) : 
-		IStreamModel(w, h), 
-		m_sessionPreprocess(NULL),
-		m_sessionPostprocess(NULL),
-		m_bindingPreprocess(NULL),
-		m_bindingPostprocess(NULL), 
-		m_sessionFused(NULL),
-		m_bindFused(NULL)
+		IStreamModel(w, h)
 	{
 		SetModels(w, h);
 	}
 	BackgroundBlur() : 
-		IStreamModel(),
-		m_sessionPreprocess(NULL),
-		m_sessionPostprocess(NULL),
-		m_bindingPreprocess(NULL),
-		m_bindingPostprocess(NULL),
-		m_sessionFused(NULL),
-		m_bindFused(NULL)
+		IStreamModel()
 	{};
 	~BackgroundBlur();
 	void SetModels(int w, int h);
 	void Run(IDirect3DSurface src, IDirect3DSurface dest);
-	VideoFrame RunAsync(IDirect3DSurface src, IDirect3DSurface dest);

 private:
 	LearningModel GetModel();
 	LearningModel PostProcess(long n, long c, long h, long w, long axis);
+	
+	// Mean and standard deviation for z-score normalization during preprocessing. 
+	std::array<float, 3> m_mean = { 0.485f, 0.456f, 0.406f };
+	std::array<float, 3> m_stddev = { 0.229f, 0.224f, 0.225f };

-	std::mutex Processing; // Ensure only one access to a BB model at a time? 

-	// Trying to used a fused learningmodelexperimental 
-	LearningModelSession m_sessionFused; 
-	LearningModelBinding m_bindFused;
-
-	// Background blur-specific sessions, bindings 
-	LearningModelSession m_sessionPreprocess; 
-	LearningModelSession m_sessionPostprocess; 
-	LearningModelBinding m_bindingPreprocess;
-	LearningModelBinding m_bindingPostprocess; 
 };
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
@ -208,7 +208,6 @@ HRESULT TransformAsync::SubmitEval(IMFSample* pInput)
    DWORD dwCurrentSample = InterlockedIncrement(&m_ulSampleCounter);
    swapChainEntry = (++swapChainEntry) % m_numThreads;
    auto model = m_models[swapChainEntry].get();
-    std::unique_lock<std::mutex> lock(model->m_runMutex);

    //pInputSample attributes to copy over to pOutputSample
    LONGLONG hnsDuration = 0;