Use non-quantized and utilization goes way up for fcn-resnet
This commit is contained in:
Родитель
9a53abdf3c
Коммит
c59da15362
|
@ -290,6 +290,4 @@ __pycache__/
|
|||
|
||||
# UWP Generated files
|
||||
**/Generated Files/
|
||||
/Samples/StyleTransfer/Assets
|
||||
/Samples/BackgroundBlur/BackgroundBlur/AsyncMFTWrapper.h
|
||||
/Samples/BackgroundBlur/BackgroundBlur/AsyncMFTWrapper.cpp
|
||||
/Samples/BackgroundBlur/BackgroundBlur/Assets
|
||||
|
|
|
@ -189,6 +189,11 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
|
||||
<DestinationFolders Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)/Assets</DestinationFolders>
|
||||
</CopyFileToFolders>
|
||||
<CopyFileToFolders Include="Assets\fcn-resnet50-11.onnx">
|
||||
<DeploymentContent>true</DeploymentContent>
|
||||
<FileType>Document</FileType>
|
||||
<DestinationFolders Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)/Assets</DestinationFolders>
|
||||
</CopyFileToFolders>
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -36,5 +36,6 @@
|
|||
<ItemGroup>
|
||||
<CopyFileToFolders Include="Assets\mosaic.onnx" />
|
||||
<CopyFileToFolders Include="Assets\fcn-resnet50-12-int8.onnx" />
|
||||
<CopyFileToFolders Include="Assets\fcn-resnet50-11.onnx" />
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -38,16 +38,7 @@ enum OnnxDataType : long {
|
|||
ONNX_BFLOAT16 = 16,
|
||||
}OnnxDataType;
|
||||
|
||||
interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
|
||||
IDXGraphicsAnalysis : public IUnknown
|
||||
{
|
||||
|
||||
STDMETHOD_(void, BeginCapture)() PURE;
|
||||
|
||||
STDMETHOD_(void, EndCapture)() PURE;
|
||||
|
||||
};
|
||||
IDXGraphicsAnalysis* pGraphicsAnalysis;
|
||||
|
||||
|
||||
// TODO: Probably don't need to be globals
|
||||
|
@ -95,8 +86,6 @@ void BackgroundBlur::SetModels(int w, int h)
|
|||
w /= g_scale; h /= g_scale;
|
||||
SetImageSize(w, h);
|
||||
|
||||
HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast<void**>(&pGraphicsAnalysis));
|
||||
|
||||
m_sessionPreprocess = CreateLearningModelSession(Normalize0_1ThenZScore(h, w, 3, mean, stddev));
|
||||
m_sessionPostprocess = CreateLearningModelSession(PostProcess(1, 3, h, w, 1));
|
||||
// Named dim override of FCN-Resnet so that unlock optimizations of fixed input size
|
||||
|
@ -118,12 +107,11 @@ void BackgroundBlur::SetModels(int w, int h)
|
|||
LearningModel BackgroundBlur::GetModel()
|
||||
{
|
||||
auto rel = std::filesystem::current_path();
|
||||
rel.append("Assets\\fcn-resnet50-12-int8.onnx");
|
||||
rel.append("Assets\\fcn-resnet50-11.onnx");
|
||||
return LearningModel::LoadFromFilePath(rel + L"");
|
||||
}
|
||||
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
||||
{
|
||||
pGraphicsAnalysis->BeginCapture();
|
||||
assert(m_session.Device().AdapterId() == nvidia);
|
||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||
|
@ -167,7 +155,6 @@ void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
|||
// TODO: Make this async as well, and add a completed
|
||||
m_sessionPostprocess.EvaluateAsync(m_bindingPostprocess, L"").get();
|
||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||
pGraphicsAnalysis->EndCapture();
|
||||
}
|
||||
|
||||
winrt::Windows::Foundation::IAsyncOperation<LearningModelEvaluationResult> BackgroundBlur::RunAsync()
|
||||
|
|
|
@ -17,9 +17,7 @@
|
|||
#include <winrt/Windows.Media.h>
|
||||
//#include <DXProgrammableCapture.h>
|
||||
#include "common.h"
|
||||
#include <DXGItype.h>
|
||||
#include <dxgi1_2.h>
|
||||
#include <dxgi1_3.h>
|
||||
|
||||
|
||||
using namespace winrt::Microsoft::AI::MachineLearning;
|
||||
using namespace winrt::Microsoft::AI::MachineLearning::Experimental;
|
||||
|
|
|
@ -310,7 +310,7 @@ protected:
|
|||
std::unique_ptr<IStreamModel> m_streamModel; // TODO: Keep a vector of stream models?
|
||||
// TODO: Prob needs to be a vector so can dynamically allocate based on what numThreads ends up as.
|
||||
std::vector<std::unique_ptr<IStreamModel>> m_models;
|
||||
int m_numThreads =2;
|
||||
int m_numThreads =5;
|
||||
|
||||
// Pseudocode
|
||||
// int numThreads; needs to be configured by constructor
|
||||
|
|
|
@ -4,6 +4,21 @@
|
|||
#include <Mfapi.h>
|
||||
#include "common/CHWMFT_DebugLogger.h"
|
||||
|
||||
#include <DXGItype.h>
|
||||
#include <dxgi1_2.h>
|
||||
#include <dxgi1_3.h>
|
||||
|
||||
|
||||
interface DECLSPEC_UUID("9f251514-9d4d-4902-9d60-18988ab7d4b5") DECLSPEC_NOVTABLE
|
||||
IDXGraphicsAnalysis : public IUnknown
|
||||
{
|
||||
|
||||
STDMETHOD_(void, BeginCapture)() PURE;
|
||||
|
||||
STDMETHOD_(void, EndCapture)() PURE;
|
||||
|
||||
};
|
||||
IDXGraphicsAnalysis* pGraphicsAnalysis;
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// Name: GetStreamLimits
|
||||
|
@ -683,8 +698,11 @@ HRESULT TransformAsync::ProcessMessage(
|
|||
|
||||
// TODO: Old messages
|
||||
case MFT_MESSAGE_NOTIFY_BEGIN_STREAMING:
|
||||
{
|
||||
HRESULT getAnalysis = DXGIGetDebugInterface1(0, __uuidof(pGraphicsAnalysis), reinterpret_cast<void**>(&pGraphicsAnalysis));
|
||||
SetupAlloc();
|
||||
break;
|
||||
}
|
||||
case MFT_MESSAGE_NOTIFY_END_STREAMING:
|
||||
default:
|
||||
break;
|
||||
|
@ -808,6 +826,7 @@ HRESULT TransformAsync::ProcessOutput(
|
|||
}
|
||||
}
|
||||
done:
|
||||
//pGraphicsAnalysis->EndCapture();
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
@ -818,6 +837,7 @@ HRESULT TransformAsync::ProcessInput(
|
|||
{
|
||||
HRESULT hr = S_OK;
|
||||
{
|
||||
//pGraphicsAnalysis->BeginCapture();
|
||||
AutoLock lock(m_critSec);
|
||||
|
||||
if (m_dwNeedInputCount == 0)
|
||||
|
|
Загрузка…
Ссылка в новой задаче