'spacing'
This commit is contained in:
Родитель
aa585036cf
Коммит
62e6380dbb
|
@ -9,32 +9,32 @@ using namespace winrt;
|
||||||
using namespace Windows::Foundation::Collections;
|
using namespace Windows::Foundation::Collections;
|
||||||
|
|
||||||
enum OnnxDataType : long {
|
enum OnnxDataType : long {
|
||||||
ONNX_UNDEFINED = 0,
|
ONNX_UNDEFINED = 0,
|
||||||
// Basic types.
|
// Basic types.
|
||||||
ONNX_FLOAT = 1,
|
ONNX_FLOAT = 1,
|
||||||
ONNX_UINT8 = 2,
|
ONNX_UINT8 = 2,
|
||||||
ONNX_INT8 = 3,
|
ONNX_INT8 = 3,
|
||||||
ONNX_UINT16 = 4,
|
ONNX_UINT16 = 4,
|
||||||
ONNX_INT16 = 5,
|
ONNX_INT16 = 5,
|
||||||
ONNX_INT32 = 6,
|
ONNX_INT32 = 6,
|
||||||
ONNX_INT64 = 7,
|
ONNX_INT64 = 7,
|
||||||
ONNX_STRING = 8,
|
ONNX_STRING = 8,
|
||||||
ONNX_BOOL = 9,
|
ONNX_BOOL = 9,
|
||||||
|
|
||||||
// IEEE754 half-precision floating-point format (16 bits wide).
|
// IEEE754 half-precision floating-point format (16 bits wide).
|
||||||
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
|
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
|
||||||
ONNX_FLOAT16 = 10,
|
ONNX_FLOAT16 = 10,
|
||||||
|
|
||||||
ONNX_DOUBLE = 11,
|
ONNX_DOUBLE = 11,
|
||||||
ONNX_UINT32 = 12,
|
ONNX_UINT32 = 12,
|
||||||
ONNX_UINT64 = 13,
|
ONNX_UINT64 = 13,
|
||||||
ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components
|
ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components
|
||||||
ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components
|
ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components
|
||||||
|
|
||||||
// Non-IEEE floating-point format based on IEEE754 single-precision
|
// Non-IEEE floating-point format based on IEEE754 single-precision
|
||||||
// floating-point number truncated to 16 bits.
|
// floating-point number truncated to 16 bits.
|
||||||
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
|
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
|
||||||
ONNX_BFLOAT16 = 16,
|
ONNX_BFLOAT16 = 16,
|
||||||
} OnnxDataType;
|
} OnnxDataType;
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,226 +43,226 @@ const int32_t opset = 12;
|
||||||
/**** Style transfer model ****/
|
/**** Style transfer model ****/
|
||||||
void StyleTransfer::InitializeSession(int w, int h)
|
void StyleTransfer::InitializeSession(int w, int h)
|
||||||
{
|
{
|
||||||
SetImageSize(720, 720); // Model input sizes fixed to 720x720.
|
SetImageSize(720, 720); // Model input sizes fixed to 720x720.
|
||||||
m_session = CreateLearningModelSession(GetModel());
|
m_session = CreateLearningModelSession(GetModel());
|
||||||
m_binding = LearningModelBinding(m_session);
|
m_binding = LearningModelBinding(m_session);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
||||||
{
|
{
|
||||||
m_syncStarted = true;
|
m_syncStarted = true;
|
||||||
|
|
||||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||||
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||||
SetVideoFrames(inVideoFrame, outVideoFrame);
|
SetVideoFrames(inVideoFrame, outVideoFrame);
|
||||||
|
|
||||||
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
|
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
|
||||||
m_binding.Bind(inputName, m_inputVideoFrame);
|
m_binding.Bind(inputName, m_inputVideoFrame);
|
||||||
hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name();
|
hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name();
|
||||||
|
|
||||||
auto outputBindProperties = PropertySet();
|
auto outputBindProperties = PropertySet();
|
||||||
outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
|
outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
|
||||||
|
|
||||||
m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties);
|
m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties);
|
||||||
auto results = m_session.Evaluate(m_binding, L"");
|
auto results = m_session.Evaluate(m_binding, L"");
|
||||||
|
|
||||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||||
|
|
||||||
m_syncStarted = false;
|
m_syncStarted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LearningModel StyleTransfer::GetModel()
|
LearningModel StyleTransfer::GetModel()
|
||||||
{
|
{
|
||||||
auto modelPath = std::filesystem::path(m_modelBasePath.c_str());
|
auto modelPath = std::filesystem::path(m_modelBasePath.c_str());
|
||||||
modelPath.append("mosaic.onnx");
|
modelPath.append("mosaic.onnx");
|
||||||
return LearningModel::LoadFromFilePath(modelPath.c_str());
|
return LearningModel::LoadFromFilePath(modelPath.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackgroundBlur::InitializeSession(int w, int h)
|
void BackgroundBlur::InitializeSession(int w, int h)
|
||||||
{
|
{
|
||||||
w /= m_scale; h /= m_scale;
|
w /= m_scale; h /= m_scale;
|
||||||
SetImageSize(w, h);
|
SetImageSize(w, h);
|
||||||
|
|
||||||
auto joinOptions1 = LearningModelJoinOptions();
|
auto joinOptions1 = LearningModelJoinOptions();
|
||||||
joinOptions1.CloseModelOnJoin(true);
|
joinOptions1.CloseModelOnJoin(true);
|
||||||
joinOptions1.Link(L"Output", L"input");
|
joinOptions1.Link(L"Output", L"input");
|
||||||
joinOptions1.JoinedNodePrefix(L"FCN_");
|
joinOptions1.JoinedNodePrefix(L"FCN_");
|
||||||
joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true);
|
joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true);
|
||||||
auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev));
|
auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev));
|
||||||
LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1);
|
LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1);
|
||||||
|
|
||||||
auto joinOptions2 = LearningModelJoinOptions();
|
auto joinOptions2 = LearningModelJoinOptions();
|
||||||
joinOptions2.CloseModelOnJoin(true);
|
joinOptions2.CloseModelOnJoin(true);
|
||||||
joinOptions2.Link(L"FCN_out", L"InputScores");
|
joinOptions2.Link(L"FCN_out", L"InputScores");
|
||||||
joinOptions2.Link(L"OutputImageForward", L"InputImage");
|
joinOptions2.Link(L"OutputImageForward", L"InputImage");
|
||||||
joinOptions2.JoinedNodePrefix(L"Post_");
|
joinOptions2.JoinedNodePrefix(L"Post_");
|
||||||
//joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU
|
//joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU
|
||||||
auto modelExperimental2 = LearningModelExperimental(intermediateModel);
|
auto modelExperimental2 = LearningModelExperimental(intermediateModel);
|
||||||
LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2);
|
LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2);
|
||||||
|
|
||||||
// Save the model for debugging purposes
|
// Save the model for debugging purposes
|
||||||
//modelExperimental2.Save(L"modelFused.onnx");
|
//modelExperimental2.Save(L"modelFused.onnx");
|
||||||
|
|
||||||
m_session = CreateLearningModelSession(modelFused);
|
m_session = CreateLearningModelSession(modelFused);
|
||||||
m_binding = LearningModelBinding(m_session);
|
m_binding = LearningModelBinding(m_session);
|
||||||
}
|
}
|
||||||
LearningModel BackgroundBlur::GetModel()
|
LearningModel BackgroundBlur::GetModel()
|
||||||
{
|
{
|
||||||
auto model_path = std::filesystem::path(m_modelBasePath.c_str());
|
auto model_path = std::filesystem::path(m_modelBasePath.c_str());
|
||||||
model_path.append("fcn-resnet50-12.onnx");
|
model_path.append("fcn-resnet50-12.onnx");
|
||||||
return LearningModel::LoadFromFilePath(model_path.c_str());
|
return LearningModel::LoadFromFilePath(model_path.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
|
||||||
{
|
{
|
||||||
m_syncStarted = true;
|
m_syncStarted = true;
|
||||||
|
|
||||||
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
|
||||||
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
|
||||||
SetVideoFrames(inVideoFrame, outVideoFrame);
|
SetVideoFrames(inVideoFrame, outVideoFrame);
|
||||||
|
|
||||||
// Shape validation
|
// Shape validation
|
||||||
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
|
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
|
||||||
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
|
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
|
||||||
|
|
||||||
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
|
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
|
||||||
hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name();
|
hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name();
|
||||||
|
|
||||||
m_binding.Bind(inputName, m_inputVideoFrame);
|
m_binding.Bind(inputName, m_inputVideoFrame);
|
||||||
m_binding.Bind(outputName, m_outputVideoFrame);
|
m_binding.Bind(outputName, m_outputVideoFrame);
|
||||||
auto results = m_session.Evaluate(m_binding, L"");
|
auto results = m_session.Evaluate(m_binding, L"");
|
||||||
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
|
||||||
m_syncStarted = false;
|
m_syncStarted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis)
|
LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis)
|
||||||
{
|
{
|
||||||
auto builder = LearningModelBuilder::Create(opset)
|
auto builder = LearningModelBuilder::Create(opset)
|
||||||
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w }))
|
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w }))
|
||||||
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w }))
|
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w }))
|
||||||
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w }))
|
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w }))
|
||||||
// Argmax Model Outputs
|
// Argmax Model Outputs
|
||||||
.Operators().Add(LearningModelOperator(L"ArgMax")
|
.Operators().Add(LearningModelOperator(L"ArgMax")
|
||||||
.SetInput(L"data", L"InputScores")
|
.SetInput(L"data", L"InputScores")
|
||||||
.SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 }))
|
.SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 }))
|
||||||
.SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis }))
|
.SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis }))
|
||||||
.SetOutput(L"reduced", L"Reduced"))
|
.SetOutput(L"reduced", L"Reduced"))
|
||||||
.Operators().Add(LearningModelOperator(L"Cast")
|
.Operators().Add(LearningModelOperator(L"Cast")
|
||||||
.SetInput(L"input", L"Reduced")
|
.SetInput(L"input", L"Reduced")
|
||||||
.SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT }))
|
.SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT }))
|
||||||
.SetOutput(L"output", L"ArgmaxOutput"))
|
.SetOutput(L"output", L"ArgmaxOutput"))
|
||||||
// Extract the foreground using the argmax scores to create a mask
|
// Extract the foreground using the argmax scores to create a mask
|
||||||
.Operators().Add(LearningModelOperator(L"Clip")
|
.Operators().Add(LearningModelOperator(L"Clip")
|
||||||
.SetInput(L"input", L"ArgmaxOutput")
|
.SetInput(L"input", L"ArgmaxOutput")
|
||||||
.SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f }))
|
.SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f }))
|
||||||
.SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
|
.SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
|
||||||
.SetOutput(L"output", L"MaskBinary"))
|
.SetOutput(L"output", L"MaskBinary"))
|
||||||
.Operators().Add(LearningModelOperator(L"Mul")
|
.Operators().Add(LearningModelOperator(L"Mul")
|
||||||
.SetInput(L"A", L"InputImage")
|
.SetInput(L"A", L"InputImage")
|
||||||
.SetInput(L"B", L"MaskBinary")
|
.SetInput(L"B", L"MaskBinary")
|
||||||
.SetOutput(L"C", L"ForegroundImage"))
|
.SetOutput(L"C", L"ForegroundImage"))
|
||||||
|
|
||||||
// Extract the blurred background using the negation of the foreground mask
|
// Extract the blurred background using the negation of the foreground mask
|
||||||
.Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background
|
.Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background
|
||||||
.SetInput(L"X", L"InputImage")
|
.SetInput(L"X", L"InputImage")
|
||||||
.SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector<int64_t>{2}, std::array<int64_t, 2>{20, 20}))
|
.SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector<int64_t>{2}, std::array<int64_t, 2>{20, 20}))
|
||||||
.SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector<int64_t>{1}, std::array<hstring, 1>{L"SAME_UPPER"}))
|
.SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector<int64_t>{1}, std::array<hstring, 1>{L"SAME_UPPER"}))
|
||||||
.SetOutput(L"Y", L"BlurredImage"))
|
.SetOutput(L"Y", L"BlurredImage"))
|
||||||
.Operators().Add(LearningModelOperator(L"Mul")
|
.Operators().Add(LearningModelOperator(L"Mul")
|
||||||
.SetInput(L"A", L"MaskBinary")
|
.SetInput(L"A", L"MaskBinary")
|
||||||
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
|
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
|
||||||
.SetOutput(L"C", L"NegMask"))
|
.SetOutput(L"C", L"NegMask"))
|
||||||
.Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask)
|
.Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask)
|
||||||
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
|
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
|
||||||
.SetInput(L"B", L"NegMask")
|
.SetInput(L"B", L"NegMask")
|
||||||
.SetOutput(L"C", L"BackgroundMask"))
|
.SetOutput(L"C", L"BackgroundMask"))
|
||||||
.Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background
|
.Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background
|
||||||
.SetInput(L"A", L"BlurredImage")
|
.SetInput(L"A", L"BlurredImage")
|
||||||
.SetInput(L"B", L"BackgroundMask")
|
.SetInput(L"B", L"BackgroundMask")
|
||||||
.SetOutput(L"C", L"BackgroundImage"))
|
.SetOutput(L"C", L"BackgroundImage"))
|
||||||
|
|
||||||
// Combine foreground and background
|
// Combine foreground and background
|
||||||
.Operators().Add(LearningModelOperator(L"Add")
|
.Operators().Add(LearningModelOperator(L"Add")
|
||||||
.SetInput(L"A", L"ForegroundImage")
|
.SetInput(L"A", L"ForegroundImage")
|
||||||
.SetInput(L"B", L"BackgroundImage")
|
.SetInput(L"B", L"BackgroundImage")
|
||||||
.SetOutput(L"C", L"OutputImage"));
|
.SetOutput(L"C", L"OutputImage"));
|
||||||
|
|
||||||
return builder.CreateModel();
|
return builder.CreateModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
LearningModel Invert(long n, long c, long h, long w)
|
LearningModel Invert(long n, long c, long h, long w)
|
||||||
{
|
{
|
||||||
auto builder = LearningModelBuilder::Create(opset)
|
auto builder = LearningModelBuilder::Create(opset)
|
||||||
// Loading in buffers and reshape
|
// Loading in buffers and reshape
|
||||||
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w }))
|
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w }))
|
||||||
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w }))
|
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w }))
|
||||||
.Operators().Add(LearningModelOperator(L"Mul")
|
.Operators().Add(LearningModelOperator(L"Mul")
|
||||||
.SetInput(L"A", L"Input")
|
.SetInput(L"A", L"Input")
|
||||||
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
|
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
|
||||||
.SetOutput(L"C", L"MulOutput")
|
.SetOutput(L"C", L"MulOutput")
|
||||||
)
|
)
|
||||||
.Operators().Add(LearningModelOperator(L"Add")
|
.Operators().Add(LearningModelOperator(L"Add")
|
||||||
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f }))
|
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f }))
|
||||||
.SetInput(L"B", L"MulOutput")
|
.SetInput(L"B", L"MulOutput")
|
||||||
.SetOutput(L"C", L"Output"));
|
.SetOutput(L"C", L"Output"));
|
||||||
|
|
||||||
return builder.CreateModel();
|
return builder.CreateModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev)
|
LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev)
|
||||||
{
|
{
|
||||||
assert(means.size() == c);
|
assert(means.size() == c);
|
||||||
assert(stddev.size() == c);
|
assert(stddev.size() == c);
|
||||||
|
|
||||||
auto builder = LearningModelBuilder::Create(opset)
|
auto builder = LearningModelBuilder::Create(opset)
|
||||||
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w}))
|
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w}))
|
||||||
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w}))
|
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w}))
|
||||||
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w}))
|
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w}))
|
||||||
.Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255
|
.Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255
|
||||||
.SetInput(L"A", L"Input")
|
.SetInput(L"A", L"Input")
|
||||||
.SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f }))
|
.SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f }))
|
||||||
.SetOutput(L"C", L"DivOutput"))
|
.SetOutput(L"C", L"DivOutput"))
|
||||||
.Operators().Add(LearningModelOperator(L"Reshape")
|
.Operators().Add(LearningModelOperator(L"Reshape")
|
||||||
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means))
|
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means))
|
||||||
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
|
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
|
||||||
.SetOutput(L"reshaped", L"MeansReshaped"))
|
.SetOutput(L"reshaped", L"MeansReshaped"))
|
||||||
.Operators().Add(LearningModelOperator(L"Reshape")
|
.Operators().Add(LearningModelOperator(L"Reshape")
|
||||||
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev))
|
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev))
|
||||||
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
|
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
|
||||||
.SetOutput(L"reshaped", L"StdDevReshaped"))
|
.SetOutput(L"reshaped", L"StdDevReshaped"))
|
||||||
.Operators().Add(LearningModelOperator(L"Sub") // Shift by the means
|
.Operators().Add(LearningModelOperator(L"Sub") // Shift by the means
|
||||||
.SetInput(L"A", L"DivOutput")
|
.SetInput(L"A", L"DivOutput")
|
||||||
.SetInput(L"B", L"MeansReshaped")
|
.SetInput(L"B", L"MeansReshaped")
|
||||||
.SetOutput(L"C", L"SubOutput"))
|
.SetOutput(L"C", L"SubOutput"))
|
||||||
.Operators().Add(LearningModelOperator(L"Div") // Divide by stddev
|
.Operators().Add(LearningModelOperator(L"Div") // Divide by stddev
|
||||||
.SetInput(L"A", L"SubOutput")
|
.SetInput(L"A", L"SubOutput")
|
||||||
.SetInput(L"B", L"StdDevReshaped")
|
.SetInput(L"B", L"StdDevReshaped")
|
||||||
.SetOutput(L"C", L"Output"))
|
.SetOutput(L"C", L"Output"))
|
||||||
.Operators().Add(LearningModelOperator(L"Identity")
|
.Operators().Add(LearningModelOperator(L"Identity")
|
||||||
.SetInput(L"input", L"Input")
|
.SetInput(L"input", L"Input")
|
||||||
.SetOutput(L"output", L"OutputImageForward"));
|
.SetOutput(L"output", L"OutputImageForward"));
|
||||||
return builder.CreateModel();
|
return builder.CreateModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w)
|
LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w)
|
||||||
{
|
{
|
||||||
auto builder = LearningModelBuilder::Create(opset)
|
auto builder = LearningModelBuilder::Create(opset)
|
||||||
// Loading in buffers and reshape
|
// Loading in buffers and reshape
|
||||||
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w }))
|
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w }))
|
||||||
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w}))
|
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w}))
|
||||||
.Operators().Add(LearningModelOperator((L"Cast"))
|
.Operators().Add(LearningModelOperator((L"Cast"))
|
||||||
.SetInput(L"input", L"Input")
|
.SetInput(L"input", L"Input")
|
||||||
.SetOutput(L"output", L"CastOutput")
|
.SetOutput(L"output", L"CastOutput")
|
||||||
.SetAttribute(L"to",
|
.SetAttribute(L"to",
|
||||||
TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT})))
|
TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT})))
|
||||||
.Operators().Add(LearningModelOperator(L"Reshape")
|
.Operators().Add(LearningModelOperator(L"Reshape")
|
||||||
.SetInput(L"data", L"CastOutput")
|
.SetInput(L"data", L"CastOutput")
|
||||||
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c}))
|
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c}))
|
||||||
.SetOutput(L"reshaped", L"ReshapeOutput"))
|
.SetOutput(L"reshaped", L"ReshapeOutput"))
|
||||||
.Operators().Add(LearningModelOperator(L"Transpose")
|
.Operators().Add(LearningModelOperator(L"Transpose")
|
||||||
.SetInput(L"data", L"ReshapeOutput")
|
.SetInput(L"data", L"ReshapeOutput")
|
||||||
.SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 }))
|
.SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 }))
|
||||||
.SetOutput(L"transposed", L"Output"));
|
.SetOutput(L"transposed", L"Output"));
|
||||||
return builder.CreateModel();
|
return builder.CreateModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,107 +31,109 @@ LearningModel Invert(long n, long c, long h, long w);
|
||||||
class StreamModelBase
|
class StreamModelBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StreamModelBase() :
|
StreamModelBase() :
|
||||||
m_inputVideoFrame(nullptr),
|
m_inputVideoFrame(nullptr),
|
||||||
m_outputVideoFrame(nullptr),
|
m_outputVideoFrame(nullptr),
|
||||||
m_session(nullptr),
|
m_session(nullptr),
|
||||||
m_binding(nullptr),
|
m_binding(nullptr),
|
||||||
m_syncStarted(false) {}
|
m_syncStarted(false) {}
|
||||||
|
|
||||||
virtual ~StreamModelBase() {
|
virtual ~StreamModelBase() {
|
||||||
if(m_session) m_session.Close();
|
if(m_session) m_session.Close();
|
||||||
if(m_binding) m_binding.Clear();
|
if(m_binding) m_binding.Clear();
|
||||||
if (m_inputVideoFrame) m_inputVideoFrame.Close();
|
if (m_inputVideoFrame) m_inputVideoFrame.Close();
|
||||||
if (m_outputVideoFrame) m_outputVideoFrame.Close();
|
if (m_outputVideoFrame) m_outputVideoFrame.Close();
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void InitializeSession(int w, int h) = 0;
|
virtual void InitializeSession(int w, int h) = 0;
|
||||||
virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0;
|
virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0;
|
||||||
|
|
||||||
// Synchronous eval status
|
// Synchronous eval status
|
||||||
bool m_syncStarted = false;
|
bool m_syncStarted = false;
|
||||||
VideoFrame m_outputVideoFrame;
|
VideoFrame m_outputVideoFrame;
|
||||||
static const int m_scale = 4;
|
static const int m_scale = 4;
|
||||||
winrt::hstring m_modelBasePath;
|
winrt::hstring m_modelBasePath;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Cache input frames into a shareable d3d-backed VideoFrame
|
// Cache input frames into a shareable d3d-backed VideoFrame
|
||||||
void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame)
|
void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame)
|
||||||
{
|
{
|
||||||
if (true || !m_videoFramesSet)
|
if (true || !m_videoFramesSet)
|
||||||
{
|
{
|
||||||
auto device = m_session.Device().Direct3D11Device();
|
auto device = m_session.Device().Direct3D11Device();
|
||||||
auto inDesc = inVideoFrame.Direct3DSurface().Description();
|
auto inDesc = inVideoFrame.Direct3DSurface().Description();
|
||||||
auto outDesc = outVideoFrame.Direct3DSurface().Description();
|
auto outDesc = outVideoFrame.Direct3DSurface().Description();
|
||||||
/*
|
/*
|
||||||
NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order
|
NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order
|
||||||
whereas every model created with LearningModelBuilder takes arguments in (height, width) order.
|
whereas every model created with LearningModelBuilder takes arguments in (height, width) order.
|
||||||
*/
|
*/
|
||||||
auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized;
|
auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized;
|
||||||
m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
|
m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
|
||||||
m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
|
m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
|
||||||
m_videoFramesSet = true;
|
m_videoFramesSet = true;
|
||||||
}
|
}
|
||||||
// NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine.
|
// NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine.
|
||||||
// Copying to a new VideoFrame makes it shareable for use in inference.
|
// Copying to a new VideoFrame makes it shareable for use in inference.
|
||||||
inVideoFrame.CopyToAsync(m_inputVideoFrame).get();
|
inVideoFrame.CopyToAsync(m_inputVideoFrame).get();
|
||||||
outVideoFrame.CopyToAsync(m_outputVideoFrame).get();
|
outVideoFrame.CopyToAsync(m_outputVideoFrame).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetImageSize(int w, int h) {
|
void SetImageSize(int w, int h)
|
||||||
m_imageWidthInPixels = w;
|
{
|
||||||
m_imageHeightInPixels = h;
|
m_imageWidthInPixels = w;
|
||||||
}
|
m_imageHeightInPixels = h;
|
||||||
|
}
|
||||||
LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true) {
|
|
||||||
auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default);
|
|
||||||
auto options = LearningModelSessionOptions();
|
|
||||||
options.BatchSizeOverride(0);
|
|
||||||
options.CloseModelOnSessionCreation(closedModel);
|
|
||||||
auto session = LearningModelSession(model, device, options);
|
|
||||||
return session;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool m_useGPU = true;
|
LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true)
|
||||||
bool m_videoFramesSet = false;
|
{
|
||||||
VideoFrame m_inputVideoFrame;
|
auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default);
|
||||||
|
auto options = LearningModelSessionOptions();
|
||||||
UINT32 m_imageWidthInPixels = 0;
|
options.BatchSizeOverride(0);
|
||||||
UINT32 m_imageHeightInPixels = 0;
|
options.CloseModelOnSessionCreation(closedModel);
|
||||||
|
auto session = LearningModelSession(model, device, options);
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
// Learning Model Binding and Session.
|
bool m_useGPU = true;
|
||||||
LearningModelSession m_session;
|
bool m_videoFramesSet = false;
|
||||||
LearningModelBinding m_binding;
|
VideoFrame m_inputVideoFrame;
|
||||||
|
UINT32 m_imageWidthInPixels = 0;
|
||||||
|
UINT32 m_imageHeightInPixels = 0;
|
||||||
|
|
||||||
|
// Learning Model Binding and Session.
|
||||||
|
LearningModelSession m_session;
|
||||||
|
LearningModelBinding m_binding;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class StyleTransfer : public StreamModelBase
|
class StyleTransfer : public StreamModelBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StyleTransfer() : StreamModelBase() {};
|
StyleTransfer() : StreamModelBase() {};
|
||||||
void InitializeSession(int w, int h);
|
void InitializeSession(int w, int h);
|
||||||
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
||||||
private:
|
private:
|
||||||
LearningModel GetModel();
|
LearningModel GetModel();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class BackgroundBlur : public StreamModelBase
|
class BackgroundBlur : public StreamModelBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
BackgroundBlur() :
|
BackgroundBlur() :
|
||||||
StreamModelBase()
|
StreamModelBase()
|
||||||
{};
|
{};
|
||||||
void InitializeSession(int w, int h);
|
void InitializeSession(int w, int h);
|
||||||
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
void Run(IDirect3DSurface src, IDirect3DSurface dest);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LearningModel GetModel();
|
LearningModel GetModel();
|
||||||
LearningModel PostProcess(long n, long c, long h, long w, long axis);
|
LearningModel PostProcess(long n, long c, long h, long w, long axis);
|
||||||
|
|
||||||
// Mean and standard deviation for z-score normalization during preprocessing.
|
// Mean and standard deviation for z-score normalization during preprocessing.
|
||||||
std::array<float, 3> m_mean = { 0.485f, 0.456f, 0.406f };
|
std::array<float, 3> m_mean = { 0.485f, 0.456f, 0.406f };
|
||||||
std::array<float, 3> m_stddev = { 0.229f, 0.224f, 0.225f };
|
std::array<float, 3> m_stddev = { 0.229f, 0.224f, 0.225f };
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
Загрузка…
Ссылка в новой задаче