Background blur is running but there seems to be an odd flicker, maybe from not clearing bindings?

2022-02-08 10:33:45 -08:00 · 2022-02-08 10:33:45 -08:00 · 1d9d216238
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp
@ -43,11 +43,11 @@ std::array<float, 3> mean = { 0.485f, 0.456f, 0.406f };
 std::array<float, 3> stddev = { 0.229f, 0.224f, 0.225f };
 auto outputBindProperties = PropertySet();

-
-
+/****	Style transfer model	****/
 void StyleTransfer::SetModels(int w, int h)
 {
-	SetImageSize(w, h);
+	// TODO: Use w/h or use the 720x720 of the mode
+	SetImageSize(720, 720); // SIze model input sizes fixed to 720x720
 	m_session = CreateLearningModelSession(GetModel());
 	m_binding = LearningModelBinding(m_session);
 }
@ -74,316 +74,82 @@ LearningModel StyleTransfer::GetModel()
 	return LearningModel::LoadFromFilePath(rel + L"");
 }

-
-
-
-/******* Start of old Segment Model stuff *******/
-SegmentModel::SegmentModel() :
-	m_sess(NULL),
-	m_sessPreprocess(NULL),
-	m_sessFCN(NULL),
-	m_sessPostprocess(NULL),
-	m_sessStyleTransfer(NULL),
-	m_useGPU(true),
-	m_bindPreprocess(NULL),
-	m_bindFCN(NULL),
-	m_bindPostprocess(NULL),
-	m_bindStyleTransfer(NULL), 
-	bindings(swapChainEntryCount)
-{
-}
-
-SegmentModel::SegmentModel(UINT32 w, UINT32 h) :
-	m_sess(NULL),
-	m_sessPreprocess(NULL),
-	m_sessFCN(NULL),
-	m_sessPostprocess(NULL),
-	m_sessStyleTransfer(NULL),
-	m_useGPU(true),
-	m_bindPreprocess(NULL),
-	m_bindFCN(NULL),
-	m_bindPostprocess(NULL),
-	m_bindStyleTransfer(NULL),
-	bindings(swapChainEntryCount)
-{
-	SetModels(w, h);
-}
-
-void SegmentModel::SetModels(UINT32 w, UINT32 h) 
+/****	Background blur model	****/
+void BackgroundBlur::SetModels(int w, int h)
 {
 	w /= g_scale; h /= g_scale;
 	SetImageSize(w, h);

-	auto fcnDevice = m_useGPU ? LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance) : LearningModelDevice(LearningModelDeviceKind::Default); // Todo: Have a toggle between GPU/ CPU? 
-	auto model = FCNResnet();
+	m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
+	m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
+	// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
+	auto fcnDevice = m_bUseGPU ? LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance) : LearningModelDevice(LearningModelDeviceKind::Default); // Todo: Have a toggle between GPU/ CPU? 
+	auto model = GetModel();
 	auto options = LearningModelSessionOptions();
 	options.BatchSizeOverride(0);
 	options.CloseModelOnSessionCreation(true);
-	// TODO: Input name vs. dimension name? 
+	// ****** TODO: Input name vs. dimension name? *****
 	// Because input name is "input" but I want to set dim 2 & 3 of that input 
 	options.OverrideNamedDimension(L"height", m_imageHeightInPixels);
 	options.OverrideNamedDimension(L"width", m_imageWidthInPixels);
-	auto session = LearningModelSession(model, fcnDevice);
- 	m_sessFCN = CreateLearningModelSession(FCNResnet());
+	m_session = LearningModelSession(model, fcnDevice, options);

-
-	m_sess = CreateLearningModelSession(Invert(1, 3, h, w));
-	m_sessStyleTransfer = CreateLearningModelSession(StyleTransfer());
-	m_bindStyleTransfer = LearningModelBinding(m_sessStyleTransfer);
-
-	// Initialize segmentation learningmodelsessions
-	m_sessPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
-	m_sessPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
-
-	// Initialize segmentation bindings
-	m_bindPreprocess = LearningModelBinding(m_sessPreprocess);
-	m_bindFCN = LearningModelBinding(m_sessFCN);
-	m_bindPostprocess = LearningModelBinding(m_sessPostprocess);
-
-	auto device = m_sessFCN.Device().Direct3D11Device();
-
-	// Create set of bindings to cycle through
-	for (int i = 0; i < swapChainEntryCount; i++) {
-		bindings.push_back(std::make_unique<SwapChainEntry>());
-		bindings[i]->binding_model = LearningModelBinding(m_sessFCN);
-		bindings[i]->binding_post = LearningModelBinding(m_sessPostprocess);
-		bindings[i]->bind_pre = LearningModelBinding(m_sessPreprocess);
-		bindings[i]->binding_post.Bind(L"OutputImage",
-			VideoFrame::CreateAsDirect3D11SurfaceBacked(Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized, m_imageWidthInPixels , m_imageHeightInPixels ));
-		bindings[i]->outputCache = VideoFrame::CreateAsDirect3D11SurfaceBacked(Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized, m_imageWidthInPixels , m_imageHeightInPixels );
-	}
- }
-
-void SegmentModel::SetImageSize(UINT32 w, UINT32 h)
-{
-	m_imageWidthInPixels = w;
-	m_imageHeightInPixels = h;
+	m_bindingPreprocess = LearningModelBinding(m_sessionPreprocess);
+	m_binding = LearningModelBinding(m_session);
+	m_bindingPostprocess = LearningModelBinding(m_sessionPostprocess);
 }
-
-void SegmentModel::Run(IDirect3DSurface src, IDirect3DSurface dest)
+LearningModel BackgroundBlur::GetModel()
 {
-	//OutputDebugString(L"\n [ Starting run | "); 
-	// 1. Get input buffer as a VideoFrame
-	VideoFrame input = VideoFrame::CreateWithDirect3D11Surface(src);
-	VideoFrame output = VideoFrame::CreateWithDirect3D11Surface(dest);
+	auto rel = std::filesystem::current_path();
+	rel.append("Assets\\fcn-resnet50-12-int8.onnx");
+	return LearningModel::LoadFromFilePath(rel + L"");
+}
+void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
+{
+	VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
+	VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
+	SetVideoFrames(inVideoFrame, outVideoFrame);

-	auto device = m_sessFCN.Device().Direct3D11Device();
-	auto desc = input.Direct3DSurface().Description();
-	auto descOut = output.Direct3DSurface().Description();
-	VideoFrame input2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(desc.Format, desc.Width/g_scale, desc.Height/g_scale, device);
-	VideoFrame output2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(descOut.Format, descOut.Width/g_scale, descOut.Height/g_scale, device);
+	// Shape validation
+	OutputDebugString(std::to_wstring(m_inputVideoFrame.Direct3DSurface().Description().Height).c_str());
+	OutputDebugString(std::to_wstring(m_inputVideoFrame.Direct3DSurface().Description().Width).c_str());
+	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
+	assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);

-	input.CopyToAsync(input2).get(); // TODO: I'm guessing it's this copy that's causing issues... 
-	output.CopyToAsync(output2).get(); 
+	// 2. Preprocessing: z-score normalization 
 	std::vector<int64_t> shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
+	ITensor intermediateTensor = TensorFloat::Create(shape);
+	hstring inputName = m_sessionPreprocess.Model().InputFeatures().GetAt(0).Name();
+	hstring outputName = m_sessionPreprocess.Model().OutputFeatures().GetAt(0).Name();

-	SubmitEval(input2, output);
-	swapChainIndex = (++swapChainIndex) % swapChainEntryCount;
+	m_bindingPreprocess.Bind(inputName, m_inputVideoFrame);
+	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
+	m_bindingPreprocess.Bind(outputName, intermediateTensor, outputBindProperties);
+	m_sessionPreprocess.EvaluateAsync(m_bindingPreprocess, L""); 

-	//timePassed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - now);
-	/*OutputDebugString(L" | Post: ");
-	OutputDebugString(std::to_wstring(timePassed.count()).c_str());*/
+	// 3. Run through actual model
+	std::vector<int64_t> FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
+	ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);

-	// Copy back to the correct surface for MFT
-	//output2.CopyToAsync(output).get(); 
+	m_binding.Bind(m_session.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
+	m_binding.Bind(m_session.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
+	m_session.EvaluateAsync(m_binding, L""); 

-	OutputDebugString(L" | Ending run ]");
+	// Shape validation 
+	assert(m_outputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
+	assert(m_outputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);

+	// 4. Postprocessing
+	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
+	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(0).Name(), m_inputVideoFrame); // InputImage
+	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
+	m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
+	m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
+
+	m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
 }

-void SegmentModel::SubmitEval(VideoFrame input, VideoFrame output) {
-	auto currentBinding = bindings[0].get();
-	if (currentBinding->activetask == nullptr
-		|| currentBinding->activetask.Status() != Windows::Foundation::AsyncStatus::Started)
-	{
-		auto now = std::chrono::high_resolution_clock::now();
-		OutputDebugString(L"PF Start new Eval ");
-		OutputDebugString(std::to_wstring(swapChainIndex).c_str());
-		OutputDebugString(L" | ");
-		// submit an eval and wait for it to finish submitting work
-		{
-			std::lock_guard<std::mutex> guard{ Processing };
-
-			// 2. Preprocessing: z-score normalization 
-			std::vector<int64_t> shape = { 1, 3, m_imageHeightInPixels, m_imageWidthInPixels };
-			ITensor intermediateTensor = TensorFloat::Create(shape);
-			hstring inputName = m_sessPreprocess.Model().InputFeatures().GetAt(0).Name();
-			hstring outputName = m_sessPreprocess.Model().OutputFeatures().GetAt(0).Name();
-
-			currentBinding->bind_pre.Bind(inputName, input);
-			outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
-			currentBinding->bind_pre.Bind(outputName, intermediateTensor, outputBindProperties);
-			m_sessPreprocess.EvaluateAsync(currentBinding->bind_pre, L"");
-			auto timePassed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - now);
-			OutputDebugString(L"Pre: ");
-
-			// 3. Run through actual model
-			std::vector<int64_t> FCNResnetOutputShape = { 1, 21, m_imageHeightInPixels, m_imageWidthInPixels };
-			ITensor FCNResnetOutput = TensorFloat::Create(FCNResnetOutputShape);
-
-			currentBinding->binding_model.Bind(m_sessFCN.Model().InputFeatures().GetAt(0).Name(), intermediateTensor);
-			currentBinding->binding_model.Bind(m_sessFCN.Model().OutputFeatures().GetAt(0).Name(), FCNResnetOutput, outputBindProperties);
-			m_sessFCN.EvaluateAsync(currentBinding->binding_model, L"");
-			OutputDebugString(L" | Model: ");
-
-			// 4. Postprocessing
-			ITensor rawLabels = TensorFloat::Create({ 1, 1, m_imageHeightInPixels, m_imageWidthInPixels });
-			outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(false));
-			currentBinding->binding_post.Bind(m_sessPostprocess.Model().InputFeatures().GetAt(0).Name(), input); // InputImage
-			currentBinding->binding_post.Bind(m_sessPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
-
-		}
-		std::rotate(bindings.begin(), bindings.begin() + 1, bindings.end());
-		finishedFrameIndex = (finishedFrameIndex - 1 + swapChainEntryCount) % swapChainEntryCount;
-		// Wait only for the last evalasync
-		currentBinding->activetask = m_sessPostprocess.EvaluateAsync(
-			currentBinding->binding_post,
-			std::to_wstring(swapChainIndex).c_str());
-		currentBinding->activetask.Completed([&, currentBinding, now](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) {
-			OutputDebugString(L"PF Eval completed |");
-			//auto results = asyncInfo.GetResults().Outputs().Lookup(L"OutputImage");
-			VideoFrame evalOutput = asyncInfo.GetResults()
-				.Outputs()
-				.Lookup(L"OutputImage")
-				.try_as<VideoFrame>(); // Must have a VF bound to output for winml to cast to VF
-			int bindingIdx;
-			bool finishedFrameUpdated;
-			{
-				std::lock_guard<std::mutex> guard{ Processing };
-				auto binding = std::find_if(bindings.begin(),
-					bindings.end(),
-					[currentBinding](const auto& b)
-					{
-						return b.get() == currentBinding;
-					});
-				bindingIdx = std::distance(bindings.begin(), binding);
-				finishedFrameUpdated = bindingIdx >= finishedFrameIndex;
-				finishedFrameIndex = finishedFrameUpdated ? bindingIdx : finishedFrameIndex;
-			}
-			if (finishedFrameUpdated)
-			{
-				OutputDebugString(L"PF Copy | ");
-				evalOutput.CopyToAsync(currentBinding->outputCache);
-			}
-
-			auto timePassed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - now);
-			// Convert to FPS: milli to seconds, invert
-			OutputDebugString(L"PF End ");
-			});
-	}
-	if (bindings[finishedFrameIndex]->outputCache != nullptr) {
-		OutputDebugString(L"\nStart CopyAsync ");
-		OutputDebugString(std::to_wstring(finishedFrameIndex).c_str());
-		{
-			// Lock so that don't have multiple sources copying to output at once
-			std::lock_guard<std::mutex> guard{ Processing };
-			bindings[finishedFrameIndex]->outputCache.CopyToAsync(output).get();
-		}
-		OutputDebugString(L" | Stop CopyAsync\n");
-	}
-	// return without waiting for the submit to finish, setup the completion handler
-}
-
-void SegmentModel::RunStyleTransfer(IDirect3DSurface src, IDirect3DSurface dest) 
-{
-	OutputDebugString(L"\n[Starting RunStyleTransfer | ");
-
-	VideoFrame input = VideoFrame::CreateWithDirect3D11Surface(src);
-	VideoFrame output = VideoFrame::CreateWithDirect3D11Surface(dest);
-
-	auto desc = input.Direct3DSurface().Description();
-	auto descOut = output.Direct3DSurface().Description();
-
-	auto sessdevice = m_sessStyleTransfer.Device().Direct3D11Device();
-	VideoFrame output2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(descOut.Format, 720, 720, sessdevice);
-	VideoFrame input2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(desc.Format, 720,720, sessdevice);
-	input.CopyToAsync(input2).get(); // TODO: Can input stay the same if NV12? 
-	output.CopyToAsync(output2).get();
-	desc = input2.Direct3DSurface().Description();
-
-	//TODO: If want to use swapchain comment out these two lines. 
-	/*SubmitEval(input2, output);
-	swapChainIndex = (++swapChainIndex) % swapChainEntryCount;*/
-
-	hstring inputName = m_sessStyleTransfer.Model().InputFeatures().GetAt(0).Name();
-	m_bindStyleTransfer.Bind(inputName, input2);
-	hstring outputName = m_sessStyleTransfer.Model().OutputFeatures().GetAt(0).Name();
-
-	auto outputBindProperties = PropertySet();
-	m_bindStyleTransfer.Bind(outputName, output2); // TODO: See if can bind videoframe from MFT
-	auto results = m_sessStyleTransfer.Evaluate(m_bindStyleTransfer, L"");
-
-	output2.CopyToAsync(output).get(); // Should put onto the correct surface now? Make sure, can return the surface instead later
-
-	OutputDebugString(L" Ending RunStyleTransfer]");
-
-}
-
-
-void SegmentModel::RunTestDXGI(IDirect3DSurface src, IDirect3DSurface dest)
-{
-
-	OutputDebugString(L"\n [ Starting runTest | "); 
-	VideoFrame input = VideoFrame::CreateWithDirect3D11Surface(src);
-	VideoFrame output = VideoFrame::CreateWithDirect3D11Surface(dest);
-	
-	auto desc = input.Direct3DSurface().Description(); 
-	auto descOut = output.Direct3DSurface().Description();
-
-	//  TODO: Use a specific device to create so not piling up on resources?
-	auto device = m_sess.Device().Direct3D11Device();
-	VideoFrame output2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(descOut.Format, descOut.Width, descOut.Height, device);
-	VideoFrame input2 = VideoFrame::CreateAsDirect3D11SurfaceBacked(desc.Format, desc.Width, desc.Height, device);
-	input.CopyToAsync(input2).get(); // TODO: Can input stay the same if NV12? 
-	output.CopyToAsync(output2).get();
-	desc = input2.Direct3DSurface().Description();
-
-	auto binding = LearningModelBinding(m_sess);
-	
-	hstring inputName = m_sess.Model().InputFeatures().GetAt(0).Name();
-	binding.Bind(inputName, input2);		
-	hstring outputName = m_sess.Model().OutputFeatures().GetAt(0).Name();
-
-	auto outputBindProperties = PropertySet();
-	binding.Bind(outputName, output2); // TODO: See if can bind videoframe from MFT
-	auto results = m_sess.Evaluate(binding, L"");
-
-	output2.CopyToAsync(output).get(); // Should put onto the correct surface now? Make sure, can return the surface instead later
-
-	binding.Clear();
-	input.Close();
-	input2.Close();
-	output2.Close();
-	output.Close();
-	OutputDebugString(L" Ending runTest ]");
-	//printf(" Ending runtest %d]", i);
-
-}
-
-LearningModel SegmentModel::Invert(long n, long c, long h, long w)
-{
-	
-	auto builder = LearningModelBuilder::Create(11)
-		// Loading in buffers and reshape
-		.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w }))
-		.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w }))
-		.Operators().Add(LearningModelOperator(L"Mul")
-			.SetInput(L"A", L"Input")
-			.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
-			//.SetConstant(L"B", TensorFloat::CreateFromIterable({3}, {0.114f, 0.587f, 0.299f}))
-			.SetOutput(L"C", L"MulOutput")
-		)
-		.Operators().Add(LearningModelOperator(L"Add")
-			.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f }))
-			.SetInput(L"B", L"MulOutput")
-			.SetOutput(L"C", L"Output")
-		)
-		;
-
-	return builder.CreateModel();
-}
-
-LearningModel SegmentModel::PostProcess(long n, long c, long h, long w, long axis)
+LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis)
 {
 	auto builder = LearningModelBuilder::Create(12)
 		.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w }))
@ -396,7 +162,7 @@ LearningModel SegmentModel::PostProcess(long n, long c, long h, long w, long axi
 			.SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis })) // Correct way of passing axis? 
 			.SetOutput(L"reduced", L"Reduced"))
 		.Operators().Add(LearningModelOperator(L"Cast")
-			.SetInput(L"input", L"Reduced")	 
+			.SetInput(L"input", L"Reduced")
 			.SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT }))
 			.SetOutput(L"output", L"ArgmaxOutput"))
 		// Extract the foreground using the argmax scores to create a mask
@ -438,21 +204,32 @@ LearningModel SegmentModel::PostProcess(long n, long c, long h, long w, long axi
 	return builder.CreateModel();
 }

-LearningModel SegmentModel::FCNResnet()
+
+
+LearningModel Invert(long n, long c, long h, long w)
 {
-	auto rel = std::filesystem::current_path();
-	rel.append("Assets\\fcn-resnet50-12-int8.onnx");
-	return LearningModel::LoadFromFilePath(rel + L"");
+	
+	auto builder = LearningModelBuilder::Create(11)
+		// Loading in buffers and reshape
+		.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w }))
+		.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w }))
+		.Operators().Add(LearningModelOperator(L"Mul")
+			.SetInput(L"A", L"Input")
+			.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
+			//.SetConstant(L"B", TensorFloat::CreateFromIterable({3}, {0.114f, 0.587f, 0.299f}))
+			.SetOutput(L"C", L"MulOutput")
+		)
+		.Operators().Add(LearningModelOperator(L"Add")
+			.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f }))
+			.SetInput(L"B", L"MulOutput")
+			.SetOutput(L"C", L"Output")
+		)
+		;
+
+	return builder.CreateModel();
 }

-LearningModel SegmentModel::StyleTransfer()
-{
-	auto rel = std::filesystem::current_path();
-	rel.append("Assets\\mosaic.onnx");
-	return LearningModel::LoadFromFilePath(rel + L"");
-}
-
-LearningModel SegmentModel::Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev)
+LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev)
 {
 	assert(means.size() == c);
 	assert(stddev.size() == c);
@ -483,7 +260,7 @@ LearningModel SegmentModel::Normalize0_1ThenZScore(long h, long w, long c, const
 	return builder.CreateModel();
 }

-LearningModel SegmentModel::ReshapeFlatBufferToNCHW(long n, long c, long h, long w)
+LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w)
 {
 	auto builder = LearningModelBuilder::Create(11)
 		// Loading in buffers and reshape
@ -506,48 +283,3 @@ LearningModel SegmentModel::ReshapeFlatBufferToNCHW(long n, long c, long h, long
 	return builder.CreateModel();
 }

-LearningModelSession SegmentModel::CreateLearningModelSession(const LearningModel& model, bool closeModel) {
-	auto device = m_useGPU ? LearningModelDevice(LearningModelDeviceKind::DirectXHighPerformance) : LearningModelDevice(LearningModelDeviceKind::Default); // Todo: Have a toggle between GPU/ CPU? 
-	auto options = LearningModelSessionOptions(); 
-	options.BatchSizeOverride(0);
-	options.CloseModelOnSessionCreation(closeModel);
-	auto session = LearningModelSession(model, device);
-	return session;
-}
-
-void SegmentModel::EvaluateInternal(LearningModelSession sess, LearningModelBinding bind, bool wait)
-{
-	auto results = sess.Evaluate(bind, L"");
-	/*auto results = sess.EvaluateAsync(bind, L"");
-	if (wait) {
-		results.GetResults(); // TODO: Will this actually wait?
-	}*/
-}
-
-LearningModelBinding SegmentModel::Evaluate(LearningModelSession& sess,const std::vector<ITensor*>& input, ITensor* output, bool wait) 
-{
-	auto binding = LearningModelBinding(sess);
-
-	for (int i = 0; i < input.size(); i++)
-	{
-		hstring inputName = sess.Model().InputFeatures().GetAt(i).Name();
-		binding.Bind(inputName, *input[i]);
-	}
-	hstring outputName = sess.Model().OutputFeatures().GetAt(0).Name();
-
-	auto outputBindProperties = PropertySet();
-	outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(!wait));
-	binding.Bind(outputName, *output, outputBindProperties);
-	//EvaluateInternal(sess, binding);
-
-	/*auto results = sess.Evaluate(binding, L"");
-	auto resultTensor = results.Outputs().Lookup(outputName).try_as<TensorFloat>();
-	float testPixels[6];
-	if (resultTensor) {
-		auto resultVector = resultTensor.GetAsVectorView();
-		resultVector.GetMany(0, testPixels);
-	}*/
-
-	return binding;
-}
-
--- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
+++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.h
@ -13,82 +13,15 @@
 #include <winrt/windows.foundation.collections.h>
 #include <winrt/Windows.Media.h>

-
 using namespace winrt::Microsoft::AI::MachineLearning;
 using namespace winrt::Microsoft::AI::MachineLearning::Experimental;
 using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
 using namespace winrt::Windows::Media;

-// Threading fields for style transfer
-struct SwapChainEntry {
-	LearningModelBinding bind_pre; 
-	LearningModelBinding binding_model; 
-	LearningModelBinding binding_post; 
-	winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> activetask;
-	VideoFrame outputCache;
-	SwapChainEntry() :
-		bind_pre(nullptr),
-		binding_model(nullptr),
-		binding_post(nullptr),
-		activetask(nullptr),
-		outputCache(NULL) {}
-};
-
-
-class SegmentModel {
-public:
-	LearningModelSession m_sess; 
-	SegmentModel();
-	SegmentModel(UINT32 w, UINT32 h);
-	void SetModels(UINT32 w, UINT32 h);
-
-	void Run(IDirect3DSurface src, IDirect3DSurface dest);
-	void RunTestDXGI(IDirect3DSurface src, IDirect3DSurface dest);
-	void RunStyleTransfer(IDirect3DSurface src, IDirect3DSurface dest);
-
-	LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel=true);
-	void SetImageSize(UINT32 w, UINT32 h);
-	bool m_useGPU = true;
-	std::mutex Processing;
-
-
-private: 
-	// Stages of image blurring
-	LearningModel Normalize0_1ThenZScore(long height, long width, long channels, const std::array<float, 3>& means, const std::array<float, 3>& stddev);
-	LearningModel FCNResnet();
-	LearningModel PostProcess(long n, long c, long h, long w, long axis);
-
-	// Debugging models 
-	LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w);
-	LearningModel Invert(long n, long c, long h, long w);
-	LearningModel StyleTransfer();
-
-	LearningModelBinding Evaluate(LearningModelSession& sess, const std::vector<ITensor*>& input, ITensor* output, bool wait = false);
-	void EvaluateInternal(LearningModelSession sess, LearningModelBinding bind, bool wait = false);
-
-	UINT32                      m_imageWidthInPixels;
-	UINT32                      m_imageHeightInPixels;
-
-	// Intermediate sessions need to be fully condensed later
-	LearningModelSession m_sessPreprocess;
-	LearningModelSession m_sessFCN;
-	LearningModelSession m_sessPostprocess;
-	LearningModelSession m_sessStyleTransfer;
-
-	LearningModelBinding m_bindPreprocess;
-	LearningModelBinding m_bindFCN;
-	LearningModelBinding m_bindPostprocess;
-	LearningModelBinding m_bindStyleTransfer;
-
-	// Threaded style transfer fields
-	void SubmitEval(VideoFrame, VideoFrame);
-	winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> evalStatus;
-	std::vector <std::unique_ptr<SwapChainEntry>> bindings;
-	int swapChainIndex = 0;
-	int swapChainEntryCount = 5;
-	int finishedFrameIndex = 0;
-
-};
+// Model-agnostic helper LearningModels
+LearningModel Normalize0_1ThenZScore(long height, long width, long channels, const std::array<float, 3>& means, const std::array<float, 3>& stddev);
+LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w);
+LearningModel Invert(long n, long c, long h, long w);

 class IStreamModel
 {
@ -120,8 +53,6 @@ public:
 	}

 protected:
-	//virtual winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BindInputs(VideoFrame input) = 0;
-
 	void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame) 
 	{
 		if (!m_bVideoFramesSet)
@ -132,10 +63,12 @@ protected:
 			}
 			auto inDesc = inVideoFrame.Direct3DSurface().Description();
 			auto outDesc = outVideoFrame.Direct3DSurface().Description();
-			// TODO: field width/heigh instead? 
-			// TODO: Set width/height for style transfer manually
-			m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(inDesc.Format, 720, 720, m_device);
-			m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(outDesc.Format, 720, 720, m_device);
+			/*
+				NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order
+				whereas every model created with LearningModelBuilder takes arguments in (height, width) order. 
+			*/ 
+			m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(inDesc.Format, m_imageWidthInPixels, m_imageHeightInPixels, m_device);
+			m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(outDesc.Format, m_imageWidthInPixels, m_imageHeightInPixels, m_device);
 			m_bVideoFramesSet = true;
 		}
 		// TODO: Fix bug in WinML so that the surfaces from capture engine are shareable, remove copy. 
@ -156,16 +89,6 @@ protected:
 		auto session = LearningModelSession(model, device, options);
 		return session;
 	}
-
-
-	//// Threaded style transfer fields
-	//void SubmitEval(VideoFrame, VideoFrame);
-	//winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> evalStatus;
-	//std::vector <std::unique_ptr<SwapChainEntry>> bindings;
-	//int swapChainIndex = 0;
-	//int swapChainEntryCount = 5;
-	//int finishedFrameIndex = 0;
-
 	bool						m_bUseGPU = true;
 	bool						m_bVideoFramesSet = false;
 	VideoFrame					m_inputVideoFrame,
@ -181,6 +104,8 @@ protected:

 }; 

+// TODO: Make an even more Invert IStreamModel? 
+
 class StyleTransfer : public IStreamModel {
 public:
 	StyleTransfer(int w, int h) : IStreamModel(w, h) {
@ -191,3 +116,36 @@ public:
 private: 
 	LearningModel GetModel();
 };
+
+
+class BackgroundBlur : public IStreamModel
+{
+public:
+	BackgroundBlur(int w, int h) : 
+		IStreamModel(w, h), 
+		m_sessionPreprocess(NULL),
+		m_sessionPostprocess(NULL),
+		m_bindingPreprocess(NULL),
+		m_bindingPostprocess(NULL)
+	{
+		SetModels(w, h);
+	}
+	BackgroundBlur() : 
+		IStreamModel(),
+		m_sessionPreprocess(NULL),
+		m_sessionPostprocess(NULL),
+		m_bindingPreprocess(NULL),
+		m_bindingPostprocess(NULL)
+	{};
+	void SetModels(int w, int h);
+	void Run(IDirect3DSurface src, IDirect3DSurface dest);
+private:
+	LearningModel GetModel();
+	LearningModel PostProcess(long n, long c, long h, long w, long axis);
+
+	// Background blur-specific sessions, bindings 
+	LearningModelSession m_sessionPreprocess; 
+	LearningModelSession m_sessionPostprocess; 
+	LearningModelBinding m_bindingPreprocess;
+	LearningModelBinding m_bindingPostprocess; 
+};
--- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
+++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.cpp
@ -748,7 +748,7 @@ HRESULT TransformAsync::InitializeTransform(void)
    // Set up circular queue of IStreamModels
    for (int i = 0; i < m_numThreads; i++) {
        // TODO: maybe styletransfer is the default model but have this change w/user input
-        m_models.push_back(std::make_unique<StyleTransfer>());
+        m_models.push_back(std::make_unique<BackgroundBlur>());
    }

 done: