No immediate crashes, need to clean up and make sure no race conditiosn
This commit is contained in:
Родитель
f1b3724c70
Коммит
02a530a0d5
|
@ -73,6 +73,11 @@ void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
|||
|
||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||
}
|
||||
void StyleTransfer::RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest)
|
||||
{
|
||||
// TODO: Implement async StyleTransfer
|
||||
m_evalStatus = NULL;
|
||||
}
|
||||
LearningModel StyleTransfer::GetModel()
|
||||
{
|
||||
auto rel = std::filesystem::current_path();
|
||||
|
@ -162,7 +167,7 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
|||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||
}
|
||||
|
||||
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BackgroundBlur::RunAsync(IDirect3DSurface src, IDirect3DSurface dest)
|
||||
void BackgroundBlur::RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest)
|
||||
{
|
||||
assert(m_session.Device().AdapterId() == nvidia);
|
||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||
|
@ -205,6 +210,11 @@ winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> Backg
|
|||
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
|
||||
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
|
||||
m_evalStatus = m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"");
|
||||
// m_evalStatus = m_outputVideoFrame.CopyToAsync(outVideoFrame);
|
||||
/*auto makeOutput = [&outVideoFrame]() -> winrt::Windows::Foundation::IAsyncOperation<VideoFrame> { co_return outVideoFrame; };
|
||||
m_evalStatus = makeOutput();*/
|
||||
|
||||
// todo: go back to have this return AsyncStatus for when done with copytoasync?
|
||||
//return m_outputVideoFrame.CopyToAsync(outVideoFrame);
|
||||
}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ public:
|
|||
};
|
||||
virtual void SetModels(int w, int h) =0;
|
||||
virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) =0;
|
||||
virtual void RunAsync(IDirect3DSurface src, IDirect3DSurface dest) = 0;
|
||||
virtual void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest) = 0;
|
||||
|
||||
void SetUseGPU(bool use) {
|
||||
m_bUseGPU = use;
|
||||
|
@ -67,6 +67,7 @@ public:
|
|||
}
|
||||
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> m_evalStatus;
|
||||
VideoFrame m_outputVideoFrame;
|
||||
std::mutex Processing;
|
||||
|
||||
protected:
|
||||
winrt::Windows::Graphics::DisplayAdapterId nvidia{};
|
||||
|
@ -109,9 +110,10 @@ protected:
|
|||
auto session = LearningModelSession(model, device, options);
|
||||
return session;
|
||||
}
|
||||
|
||||
bool m_bUseGPU = true;
|
||||
bool m_bVideoFramesSet = false;
|
||||
VideoFrame m_inputVideoFrame,
|
||||
VideoFrame m_inputVideoFrame;
|
||||
|
||||
UINT32 m_imageWidthInPixels;
|
||||
UINT32 m_imageHeightInPixels;
|
||||
|
@ -143,7 +145,7 @@ public:
|
|||
StyleTransfer() : IStreamModel() {};
|
||||
void SetModels(int w, int h);
|
||||
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
||||
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> RunAsync(IDirect3DSurface src, IDirect3DSurface dest);
|
||||
void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest);
|
||||
private:
|
||||
LearningModel GetModel();
|
||||
};
|
||||
|
@ -170,7 +172,7 @@ public:
|
|||
{};
|
||||
void SetModels(int w, int h);
|
||||
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
||||
virtual void RunAsync(IDirect3DSurface src, IDirect3DSurface dest) = 0;
|
||||
void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest);
|
||||
|
||||
private:
|
||||
LearningModel GetModel();
|
||||
|
|
|
@ -193,19 +193,21 @@ done:
|
|||
|
||||
}
|
||||
|
||||
HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
|
||||
HRESULT TransformAsync::SubmitEval(IMFSample* pInput)
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
winrt::com_ptr<IMFSample> pOutputSample;
|
||||
DWORD dwCurrentSample = InterlockedIncrement(&m_ulSampleCounter); // todo: set at the end of a call
|
||||
int modelIndex = dwCurrentSample % m_numThreads;
|
||||
IDirect3DSurface src, dest;
|
||||
VideoFrame outVideoFrame = NULL;
|
||||
winrt::com_ptr<IMFSample> pInputSample;
|
||||
pInputSample.copy_from(pInput); // TODO: Attach or copy_from?
|
||||
|
||||
winrt::com_ptr<IDXGIDevice> pDXGIDevice{ m_spDevice.as<IDXGIDevice>() };
|
||||
winrt::com_ptr<IDXGIAdapter> pAdapter;
|
||||
DXGI_ADAPTER_DESC desc;
|
||||
|
||||
auto model = m_models[0].get(); // Lock on?
|
||||
//pInputSample attributes to copy over to pOutputSample
|
||||
LONGLONG hnsDuration = 0;
|
||||
LONGLONG hnsTime = 0;
|
||||
UINT64 pun64MarkerID = 0;
|
||||
|
||||
TRACE((L"\n[Sample: %d | model: %d | ", dwCurrentSample, modelIndex));
|
||||
|
||||
|
@ -231,10 +233,16 @@ HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
|
|||
CHECK_HR(hr = E_INVALIDARG);
|
||||
}
|
||||
|
||||
// Explicitly copy out pInput attributes, since copy doesn't do this for us
|
||||
CHECK_HR(pInputSample->GetSampleDuration(&hnsDuration));
|
||||
CHECK_HR(pInputSample->GetSampleTime(&hnsTime));
|
||||
pInputSample->GetUINT64(TransformAsync_MFSampleExtension_Marker, &pun64MarkerID);
|
||||
|
||||
// **** 2. Run inference on input sample
|
||||
src = SampleToD3Dsurface(pInputSample);
|
||||
src = SampleToD3Dsurface(pInputSample.get());
|
||||
dest = SampleToD3Dsurface(pOutputSample.get());
|
||||
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||
outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||
auto model = m_models[0].get(); // Lock on?
|
||||
|
||||
// Check if model already has an active task going
|
||||
if(model->m_evalStatus == nullptr || model->m_evalStatus.Status() != winrt::Windows::Foundation::AsyncStatus::Started)
|
||||
|
@ -242,78 +250,56 @@ HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
|
|||
// Do the copies inside runtest
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
// TODO: Keep track of finishedFrameIndex
|
||||
|
||||
model->RunAsync(src, dest); // TODO: Do I need a lock on this because of binding?
|
||||
{
|
||||
// Lock on model specifically instead?
|
||||
std::lock_guard<std::mutex> guard{ model->Processing };
|
||||
model->RunAsync(src, dest); // TODO: Do I need a lock on this because of binding? Probably: what if n=1 and try again to bind?
|
||||
}
|
||||
std::rotate(m_models.begin(), m_models.begin() + 1, m_models.end()); // Put most recently used model at the back
|
||||
finishedFrameIndex = (finishedFrameIndex - 1 + m_numThreads) % m_numThreads;
|
||||
model->m_evalStatus.Completed([&](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) {
|
||||
model->m_evalStatus.Completed([this, src, dest, raw=pOutputSample.get(), pInputSample, hnsDuration, hnsTime, pun64MarkerID, outVideoFrame] (auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) mutable {
|
||||
|
||||
OutputDebugString(L"Eval Complete");
|
||||
VideoFrame output = asyncInfo.GetResults().Outputs().Lookup(L"OutputImage").try_as<VideoFrame>();
|
||||
|
||||
// TODO: Do I need all this finished frame check stuff?
|
||||
// TODO: Can just call copytoasync synchronously?
|
||||
int bindingIdx;
|
||||
bool finishedFrameUpdated;
|
||||
VideoFrame output = asyncInfo.GetResults().Outputs()
|
||||
.Lookup(L"OutputImage")
|
||||
.try_as<VideoFrame>();; // Will it have copied to the correct surface?
|
||||
if (output)
|
||||
{
|
||||
std::lock_guard<mutex> guard{ Processing };
|
||||
auto modelFind = std::find_if(m_models.begin(), m_models.end(),
|
||||
[model](const auto& b) {
|
||||
return b.get() == model;
|
||||
}
|
||||
bindingIdx = std::distance(bindings.begin(), modelFind);
|
||||
finishedFrameUpdated = bindingIdx >= finishedFrameIndex;
|
||||
finishedFrameIndex = finishedFrameUpdated ? bindingIdx : finishedFrameIndex;
|
||||
}
|
||||
if (finishedFrameUpdated)
|
||||
{
|
||||
output.CopyToAsync(model->m_outputVideoFrame); // keeps async here so maybe that's why we need
|
||||
output.CopyToAsync(outVideoFrame).get();
|
||||
outVideoFrame.Close();
|
||||
}
|
||||
|
||||
auto timePassed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - now);
|
||||
if (dwCurrentSample > 1) runningAverage += (timePassed.count() - runningAverage) / dwCurrentSample;
|
||||
TRACE((L"Runtime : %f", runningAverage));
|
||||
//output.CopyToAsync(model->m_outputVideoFrame).get(); // TODO: will this link correctly with dest id3d?
|
||||
|
||||
// TODO: Can set pOutputSample as well so that just copy everything by value in lambda capture clause
|
||||
FinishEval(pInputSample, raw, src, dest, hnsDuration, hnsTime, pun64MarkerID); // Trying to maintain the lifetime of the whole sample
|
||||
});
|
||||
}
|
||||
if (model->m_outputVideoFrame != nullptr)
|
||||
{
|
||||
// TODO: I don't think this needs a lock bc each call to submit eval is specific to a frame
|
||||
// Lock so that don't have multiple sources copying to output at once
|
||||
// std::lock_guard<std::mutex> guard{ Processing };
|
||||
model->m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||
// TODO: Does this need to be locked?
|
||||
FinishEval(pInputSample, pOutputSample, src, dest);
|
||||
}
|
||||
|
||||
|
||||
|
||||
done:
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSample> pOutputSample,
|
||||
IDirect3DSurface src, IDirect3DSurface dest)
|
||||
// Try as const ref
|
||||
HRESULT TransformAsync::FinishEval(winrt::com_ptr<IMFSample> pInputSample, IMFSample* pOutput,
|
||||
IDirect3DSurface src, IDirect3DSurface dest, LONGLONG hnsDuration, LONGLONG hnsTime, UINT64 pun64MarkerID)
|
||||
{
|
||||
LONGLONG hnsDuration = 0;
|
||||
LONGLONG hnsTime = 0;
|
||||
UINT64 pun64MarkerID = 0;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
winrt::com_ptr<IMFMediaBuffer> pMediaBuffer;
|
||||
winrt::com_ptr<IMFMediaEvent> pHaveOutputEvent;
|
||||
|
||||
winrt::com_ptr<IMFSample> pOutputSample;
|
||||
pOutputSample.copy_from(pOutput);
|
||||
|
||||
src.Close();
|
||||
dest.Close();
|
||||
|
||||
// **** 3. Set up the output sample
|
||||
// CHECK_HR(hr = DuplicateAttributes(pOutputSample.get(), pInputSample));
|
||||
if (SUCCEEDED(pInputSample->GetSampleDuration(&hnsDuration)))
|
||||
{
|
||||
CHECK_HR(hr = pOutputSample->SetSampleDuration(hnsDuration));
|
||||
}
|
||||
if (SUCCEEDED(pInputSample->GetSampleTime(&hnsTime)))
|
||||
{
|
||||
CHECK_HR(hr = pOutputSample->SetSampleTime(hnsTime));
|
||||
// todo: incrememt m_
|
||||
}
|
||||
CHECK_HR(hr = pOutputSample->SetSampleDuration(hnsDuration));
|
||||
CHECK_HR(hr = pOutputSample->SetSampleTime(hnsTime));
|
||||
|
||||
|
||||
// Always set the output buffer size!
|
||||
CHECK_HR(hr = pOutputSample->GetBufferByIndex(0, pMediaBuffer.put()));
|
||||
|
@ -322,7 +308,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
|
|||
if(m_bFirstSample != FALSE)
|
||||
{
|
||||
// TODO: What if make not discontinuity?
|
||||
// CHECK_HR(hr = pOutputSample->SetUINT32(MFSampleExtension_Discontinuity, TRUE));
|
||||
CHECK_HR(hr = pOutputSample->SetUINT32(MFSampleExtension_Discontinuity, TRUE));
|
||||
m_bFirstSample = FALSE;
|
||||
}
|
||||
|
||||
|
@ -338,7 +324,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
|
|||
m_dwStatus |= MYMFT_STATUS_OUTPUT_SAMPLE_READY;
|
||||
}
|
||||
|
||||
if (pInputSample->GetUINT64(TransformAsync_MFSampleExtension_Marker, &pun64MarkerID) == S_OK)
|
||||
if (pun64MarkerID)
|
||||
{
|
||||
// This input sample is flagged as a marker
|
||||
winrt::com_ptr<IMFMediaEvent> pMarkerEvent;
|
||||
|
@ -351,7 +337,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
|
|||
CHECK_HR(hr = RequestSample(0));
|
||||
|
||||
done:
|
||||
|
||||
// TODO: Close pInput/pOutput
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
|
|
@ -222,8 +222,8 @@ public:
|
|||
#pragma endregion IMFAsyncCallback
|
||||
|
||||
HRESULT SubmitEval(IMFSample* pInputSample);
|
||||
HRESULT FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSample> pOutputSample,
|
||||
IDirect3DSurface src, IDirect3DSurface dest);
|
||||
HRESULT FinishEval(winrt::com_ptr<IMFSample> pInputSample, IMFSample* pOutput,
|
||||
IDirect3DSurface src, IDirect3DSurface dest, LONGLONG hnsDuration, LONGLONG hnsTime, UINT64 pun64MarkerID);
|
||||
|
||||
protected:
|
||||
TransformAsync(HRESULT& hr);
|
||||
|
@ -312,7 +312,7 @@ protected:
|
|||
std::unique_ptr<IStreamModel> m_streamModel; // TODO: Keep a vector of stream models?
|
||||
// TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
|
||||
std::vector<std::unique_ptr<IStreamModel>> m_models;
|
||||
int m_numThreads =3;
|
||||
int m_numThreads =2;
|
||||
int finishedFrameIndex = 0;
|
||||
std::mutex Processing;
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче