From c165e0bdb084b101f4fe794027d387f364c7e54d Mon Sep 17 00:00:00 2001 From: Linnea May Date: Fri, 15 Apr 2022 13:32:34 -0700 Subject: [PATCH] address comments in segmentmodel --- Samples/BackgroundBlur/.gitignore | 1 - .../BackgroundBlur/SegmentModel.cpp | 35 +++++------ .../BackgroundBlur/SegmentModel.h | 60 +++++++------------ .../BackgroundBlur/TransformAsync.cpp | 6 +- .../BackgroundBlur/TransformAsync.h | 8 +-- 5 files changed, 44 insertions(+), 66 deletions(-) delete mode 100644 Samples/BackgroundBlur/.gitignore diff --git a/Samples/BackgroundBlur/.gitignore b/Samples/BackgroundBlur/.gitignore deleted file mode 100644 index fac783a0..00000000 --- a/Samples/BackgroundBlur/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.onnx \ No newline at end of file diff --git a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp index 0db5a2d5..4e216078 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp +++ b/Samples/BackgroundBlur/BackgroundBlur/SegmentModel.cpp @@ -38,11 +38,10 @@ enum OnnxDataType : long { }OnnxDataType; -int g_scale = 5; -auto outputBindProperties = PropertySet(); +const int32_t opset = 12; /**** Style transfer model ****/ -void StyleTransfer::SetModels(int w, int h) +void StyleTransfer::InitializeSession(int w, int h) { // TODO: Use w/h or use the 720x720 of the mode SetImageSize(720, 720); // SIze model input sizes fixed to 720x720 @@ -53,7 +52,6 @@ void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest) { m_bSyncStarted = TRUE; - assert(m_session.Device().AdapterId() == m_highPerfAdapter); VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); SetVideoFrames(inVideoFrame, outVideoFrame); @@ -72,11 +70,12 @@ void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest) m_bSyncStarted = FALSE; } + LearningModel StyleTransfer::GetModel() { - auto rel = std::filesystem::current_path(); - rel.append("Assets\\mosaic.onnx"); - return LearningModel::LoadFromFilePath(rel + L""); + auto model_path = std::filesystem::current_path(); + model_path.append("Assets\\mosaic.onnx"); + return LearningModel::LoadFromFilePath(model_path.c_str()); } @@ -85,9 +84,9 @@ BackgroundBlur::~BackgroundBlur() { if (m_session) m_session.Close(); } -void BackgroundBlur::SetModels(int w, int h) +void BackgroundBlur::InitializeSession(int w, int h) { - w /= g_scale; h /= g_scale; + w /= m_scale; h /= m_scale; SetImageSize(w, h); auto joinOptions1 = LearningModelJoinOptions(); @@ -103,7 +102,7 @@ void BackgroundBlur::SetModels(int w, int h) joinOptions2.Link(L"FCN_out", L"InputScores"); joinOptions2.Link(L"OutputImageForward", L"InputImage"); joinOptions2.JoinedNodePrefix(L"Post_"); - //joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU + //joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU auto modelExperimental2 = LearningModelExperimental(intermediateModel); LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2); @@ -115,16 +114,15 @@ void BackgroundBlur::SetModels(int w, int h) } LearningModel BackgroundBlur::GetModel() { - auto rel = std::filesystem::current_path(); - rel.append("Assets\\fcn-resnet50-12.onnx"); - return LearningModel::LoadFromFilePath(rel + L""); + auto model_path = std::filesystem::current_path(); + model_path.append("Assets\\fcn-resnet50-12.onnx"); + return LearningModel::LoadFromFilePath(model_path.c_str()); } void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest) { m_bSyncStarted = TRUE; // Device validation - assert(m_session.Device().AdapterId() == m_highPerfAdapter); VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); SetVideoFrames(inVideoFrame, outVideoFrame); @@ -145,7 +143,7 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest) LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis) { - auto builder = LearningModelBuilder::Create(12) + auto builder = LearningModelBuilder::Create(opset) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w })) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w })) // Different input type? .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w })) @@ -204,8 +202,7 @@ LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long a LearningModel Invert(long n, long c, long h, long w) { - - auto builder = LearningModelBuilder::Create(11) + auto builder = LearningModelBuilder::Create(opset) // Loading in buffers and reshape .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w })) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w })) @@ -230,7 +227,7 @@ LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::arrayCreateFence(m_fenceValue, flag, __uuidof(ID3D11Fence), m_spFence.put_void()); // Probably don't need to save the event for the first frame to render, since that will be long anyways w first Eval/Bind. - // Actually prob will be long for the first little bit anyways bc of each IStreamModel to select, but oh well. It'll be fine. + // Actually prob will be long for the first little bit anyways bc of each StreamModelBase to select, but oh well. It'll be fine. } else { @@ -799,7 +799,7 @@ HRESULT TransformAsync::InitializeTransform(void) CHECK_HR(hr = CSampleQueue::Create(&m_pOutputSampleQueue)); - // Set up circular queue of IStreamModels + // Set up circular queue of StreamModelBases for (int i = 0; i < m_numThreads; i++) { // TODO: Have a dialogue to select which model to select for real-time inference. m_models.push_back(std::make_unique()); @@ -898,7 +898,7 @@ HRESULT TransformAsync::UpdateFormatInfo() // Set the size of the SegmentModel for (int i = 0; i < m_numThreads; i++) { - m_models[i]->SetModels(m_imageWidthInPixels, m_imageHeightInPixels); + m_models[i]->InitializeSession(m_imageWidthInPixels, m_imageHeightInPixels); } } diff --git a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h index 9e6565e6..93fa082c 100644 --- a/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h +++ b/Samples/BackgroundBlur/BackgroundBlur/TransformAsync.h @@ -226,7 +226,7 @@ public: HRESULT NotifyRelease(); #pragma endregion IMFVideoSampleAllocatorNotify - // Uses the next available IStreamModel to run inference on pInputSample + // Uses the next available StreamModelBase to run inference on pInputSample // and allocates a transformed output sample. HRESULT SubmitEval(IMFSample* pInputSample); @@ -268,7 +268,7 @@ protected: HRESULT OnSetD3DManager(ULONG_PTR ulParam); // After the input type is set, update MFT format information and sets - // IStreamModel input sizes. + // StreamModelBase input sizes. HRESULT UpdateFormatInfo(); // Sets up the output sample allocator. @@ -309,7 +309,7 @@ protected: com_ptr m_spAttributes; // MFT Attributes. com_ptr m_spAllocatorAttributes;// Output sample allocator attributes. bool m_bAllocatorInitialized;// True if sample allocator has been initialized. - volatile ULONG m_ulSampleCounter; // Frame number, can use to pick a IStreamModel. + volatile ULONG m_ulSampleCounter; // Frame number, can use to pick a StreamModelBase. volatile ULONG m_ulProcessedFrameNum; // Number of frames we've processed. volatile ULONG m_currFrameNumber; // The current frame to be processed. @@ -347,7 +347,7 @@ protected: // Model Inference fields int m_numThreads = // Number of threads running inference in parallel. max(std::thread::hardware_concurrency(), 5); - std::vector> m_models; // m_numThreads number of models to run inference in parallel. + std::vector> m_models; // m_numThreads number of models to run inference in parallel. int modelIndex = 0; }; \ No newline at end of file