No immediate crashes, need to clean up and make sure no race conditiosn

This commit is contained in:
Linnea May 2022-02-22 14:31:00 -08:00
Родитель f1b3724c70
Коммит 02a530a0d5
4 изменённых файлов: 66 добавлений и 68 удалений

Просмотреть файл

@ -73,6 +73,11 @@ void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
}
void StyleTransfer::RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest)
{
// TODO: Implement async StyleTransfer
m_evalStatus = NULL;
}
LearningModel StyleTransfer::GetModel()
{
auto rel = std::filesystem::current_path();
@ -162,7 +167,7 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
}
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BackgroundBlur::RunAsync(IDirect3DSurface src, IDirect3DSurface dest)
void BackgroundBlur::RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest)
{
assert(m_session.Device().AdapterId() == nvidia);
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
@ -205,6 +210,11 @@ winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> Backg
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().InputFeatures().GetAt(1).Name(), FCNResnetOutput); // InputScores
m_bindingPostprocess.Bind(m_sessionPostprocess.Model().OutputFeatures().GetAt(0).Name(), m_outputVideoFrame);
m_evalStatus = m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"");
// m_evalStatus = m_outputVideoFrame.CopyToAsync(outVideoFrame);
/*auto makeOutput = [&outVideoFrame]() -> winrt::Windows::Foundation::IAsyncOperation<VideoFrame> { co_return outVideoFrame; };
m_evalStatus = makeOutput();*/
// todo: go back to have this return AsyncStatus for when done with copytoasync?
//return m_outputVideoFrame.CopyToAsync(outVideoFrame);
}

Просмотреть файл

@ -54,7 +54,7 @@ public:
};
virtual void SetModels(int w, int h) =0;
virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) =0;
virtual void RunAsync(IDirect3DSurface src, IDirect3DSurface dest) = 0;
virtual void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest) = 0;
void SetUseGPU(bool use) {
m_bUseGPU = use;
@ -67,6 +67,7 @@ public:
}
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> m_evalStatus;
VideoFrame m_outputVideoFrame;
std::mutex Processing;
protected:
winrt::Windows::Graphics::DisplayAdapterId nvidia{};
@ -109,9 +110,10 @@ protected:
auto session = LearningModelSession(model, device, options);
return session;
}
bool m_bUseGPU = true;
bool m_bVideoFramesSet = false;
VideoFrame m_inputVideoFrame,
VideoFrame m_inputVideoFrame;
UINT32 m_imageWidthInPixels;
UINT32 m_imageHeightInPixels;
@ -143,7 +145,7 @@ public:
StyleTransfer() : IStreamModel() {};
void SetModels(int w, int h);
void Run(IDirect3DSurface src, IDirect3DSurface dest);
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> RunAsync(IDirect3DSurface src, IDirect3DSurface dest);
void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest);
private:
LearningModel GetModel();
};
@ -170,7 +172,7 @@ public:
{};
void SetModels(int w, int h);
void Run(IDirect3DSurface src, IDirect3DSurface dest);
virtual void RunAsync(IDirect3DSurface src, IDirect3DSurface dest) = 0;
void RunAsync(IDirect3DSurface& src, IDirect3DSurface& dest);
private:
LearningModel GetModel();

Просмотреть файл

@ -193,19 +193,21 @@ done:
}
HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
HRESULT TransformAsync::SubmitEval(IMFSample* pInput)
{
HRESULT hr = S_OK;
winrt::com_ptr<IMFSample> pOutputSample;
DWORD dwCurrentSample = InterlockedIncrement(&m_ulSampleCounter); // todo: set at the end of a call
int modelIndex = dwCurrentSample % m_numThreads;
IDirect3DSurface src, dest;
VideoFrame outVideoFrame = NULL;
winrt::com_ptr<IMFSample> pInputSample;
pInputSample.copy_from(pInput); // TODO: Attach or copy_from?
winrt::com_ptr<IDXGIDevice> pDXGIDevice{ m_spDevice.as<IDXGIDevice>() };
winrt::com_ptr<IDXGIAdapter> pAdapter;
DXGI_ADAPTER_DESC desc;
auto model = m_models[0].get(); // Lock on?
//pInputSample attributes to copy over to pOutputSample
LONGLONG hnsDuration = 0;
LONGLONG hnsTime = 0;
UINT64 pun64MarkerID = 0;
TRACE((L"\n[Sample: %d | model: %d | ", dwCurrentSample, modelIndex));
@ -231,10 +233,16 @@ HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
CHECK_HR(hr = E_INVALIDARG);
}
// Explicitly copy out pInput attributes, since copy doesn't do this for us
CHECK_HR(pInputSample->GetSampleDuration(&hnsDuration));
CHECK_HR(pInputSample->GetSampleTime(&hnsTime));
pInputSample->GetUINT64(TransformAsync_MFSampleExtension_Marker, &pun64MarkerID);
// **** 2. Run inference on input sample
src = SampleToD3Dsurface(pInputSample);
src = SampleToD3Dsurface(pInputSample.get());
dest = SampleToD3Dsurface(pOutputSample.get());
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
auto model = m_models[0].get(); // Lock on?
// Check if model already has an active task going
if(model->m_evalStatus == nullptr || model->m_evalStatus.Status() != winrt::Windows::Foundation::AsyncStatus::Started)
@ -242,78 +250,56 @@ HRESULT TransformAsync::SubmitEval(IMFSample* pInputSample)
// Do the copies inside runtest
auto now = std::chrono::high_resolution_clock::now();
// TODO: Keep track of finishedFrameIndex
model->RunAsync(src, dest); // TODO: Do I need a lock on this because of binding?
{
// Lock on model specifically instead?
std::lock_guard<std::mutex> guard{ model->Processing };
model->RunAsync(src, dest); // TODO: Do I need a lock on this because of binding? Probably: what if n=1 and try again to bind?
}
std::rotate(m_models.begin(), m_models.begin() + 1, m_models.end()); // Put most recently used model at the back
finishedFrameIndex = (finishedFrameIndex - 1 + m_numThreads) % m_numThreads;
model->m_evalStatus.Completed([&](auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) {
model->m_evalStatus.Completed([this, src, dest, raw=pOutputSample.get(), pInputSample, hnsDuration, hnsTime, pun64MarkerID, outVideoFrame] (auto&& asyncInfo, winrt::Windows::Foundation::AsyncStatus const) mutable {
OutputDebugString(L"Eval Complete");
VideoFrame output = asyncInfo.GetResults().Outputs().Lookup(L"OutputImage").try_as<VideoFrame>();
// TODO: Do I need all this finished frame check stuff?
// TODO: Can just call copytoasync synchronously?
int bindingIdx;
bool finishedFrameUpdated;
VideoFrame output = asyncInfo.GetResults().Outputs()
.Lookup(L"OutputImage")
.try_as<VideoFrame>();; // Will it have copied to the correct surface?
if (output)
{
std::lock_guard<mutex> guard{ Processing };
auto modelFind = std::find_if(m_models.begin(), m_models.end(),
[model](const auto& b) {
return b.get() == model;
}
bindingIdx = std::distance(bindings.begin(), modelFind);
finishedFrameUpdated = bindingIdx >= finishedFrameIndex;
finishedFrameIndex = finishedFrameUpdated ? bindingIdx : finishedFrameIndex;
}
if (finishedFrameUpdated)
{
output.CopyToAsync(model->m_outputVideoFrame); // keeps async here so maybe that's why we need
output.CopyToAsync(outVideoFrame).get();
outVideoFrame.Close();
}
auto timePassed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - now);
if (dwCurrentSample > 1) runningAverage += (timePassed.count() - runningAverage) / dwCurrentSample;
TRACE((L"Runtime : %f", runningAverage));
//output.CopyToAsync(model->m_outputVideoFrame).get(); // TODO: will this link correctly with dest id3d?
// TODO: Can set pOutputSample as well so that just copy everything by value in lambda capture clause
FinishEval(pInputSample, raw, src, dest, hnsDuration, hnsTime, pun64MarkerID); // Trying to maintain the lifetime of the whole sample
});
}
if (model->m_outputVideoFrame != nullptr)
{
// TODO: I don't think this needs a lock bc each call to submit eval is specific to a frame
// Lock so that don't have multiple sources copying to output at once
// std::lock_guard<std::mutex> guard{ Processing };
model->m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
// TODO: Does this need to be locked?
FinishEval(pInputSample, pOutputSample, src, dest);
}
done:
return hr;
}
HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSample> pOutputSample,
IDirect3DSurface src, IDirect3DSurface dest)
// Try as const ref
HRESULT TransformAsync::FinishEval(winrt::com_ptr<IMFSample> pInputSample, IMFSample* pOutput,
IDirect3DSurface src, IDirect3DSurface dest, LONGLONG hnsDuration, LONGLONG hnsTime, UINT64 pun64MarkerID)
{
LONGLONG hnsDuration = 0;
LONGLONG hnsTime = 0;
UINT64 pun64MarkerID = 0;
HRESULT hr = S_OK;
winrt::com_ptr<IMFMediaBuffer> pMediaBuffer;
winrt::com_ptr<IMFMediaEvent> pHaveOutputEvent;
winrt::com_ptr<IMFSample> pOutputSample;
pOutputSample.copy_from(pOutput);
src.Close();
dest.Close();
// **** 3. Set up the output sample
// CHECK_HR(hr = DuplicateAttributes(pOutputSample.get(), pInputSample));
if (SUCCEEDED(pInputSample->GetSampleDuration(&hnsDuration)))
{
CHECK_HR(hr = pOutputSample->SetSampleDuration(hnsDuration));
}
if (SUCCEEDED(pInputSample->GetSampleTime(&hnsTime)))
{
CHECK_HR(hr = pOutputSample->SetSampleTime(hnsTime));
// todo: incrememt m_
}
CHECK_HR(hr = pOutputSample->SetSampleDuration(hnsDuration));
CHECK_HR(hr = pOutputSample->SetSampleTime(hnsTime));
// Always set the output buffer size!
CHECK_HR(hr = pOutputSample->GetBufferByIndex(0, pMediaBuffer.put()));
@ -322,7 +308,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
if(m_bFirstSample != FALSE)
{
// TODO: What if make not discontinuity?
// CHECK_HR(hr = pOutputSample->SetUINT32(MFSampleExtension_Discontinuity, TRUE));
CHECK_HR(hr = pOutputSample->SetUINT32(MFSampleExtension_Discontinuity, TRUE));
m_bFirstSample = FALSE;
}
@ -338,7 +324,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
m_dwStatus |= MYMFT_STATUS_OUTPUT_SAMPLE_READY;
}
if (pInputSample->GetUINT64(TransformAsync_MFSampleExtension_Marker, &pun64MarkerID) == S_OK)
if (pun64MarkerID)
{
// This input sample is flagged as a marker
winrt::com_ptr<IMFMediaEvent> pMarkerEvent;
@ -351,7 +337,7 @@ HRESULT TransformAsync::FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSa
CHECK_HR(hr = RequestSample(0));
done:
// TODO: Close pInput/pOutput
return hr;
}

Просмотреть файл

@ -222,8 +222,8 @@ public:
#pragma endregion IMFAsyncCallback
HRESULT SubmitEval(IMFSample* pInputSample);
HRESULT FinishEval(IMFSample* pInputSample, winrt::com_ptr<IMFSample> pOutputSample,
IDirect3DSurface src, IDirect3DSurface dest);
HRESULT FinishEval(winrt::com_ptr<IMFSample> pInputSample, IMFSample* pOutput,
IDirect3DSurface src, IDirect3DSurface dest, LONGLONG hnsDuration, LONGLONG hnsTime, UINT64 pun64MarkerID);
protected:
TransformAsync(HRESULT& hr);
@ -312,7 +312,7 @@ protected:
std::unique_ptr<IStreamModel> m_streamModel; // TODO: Keep a vector of stream models?
// TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
std::vector<std::unique_ptr<IStreamModel>> m_models;
int m_numThreads =3;
int m_numThreads =2;
int finishedFrameIndex = 0;
std::mutex Processing;