This commit is contained in:
Linnea May 2022-08-22 18:13:39 -07:00
Родитель aa585036cf
Коммит 62e6380dbb
2 изменённых файлов: 271 добавлений и 269 удалений

Просмотреть файл

@ -9,32 +9,32 @@ using namespace winrt;
using namespace Windows::Foundation::Collections; using namespace Windows::Foundation::Collections;
enum OnnxDataType : long { enum OnnxDataType : long {
ONNX_UNDEFINED = 0, ONNX_UNDEFINED = 0,
// Basic types. // Basic types.
ONNX_FLOAT = 1, ONNX_FLOAT = 1,
ONNX_UINT8 = 2, ONNX_UINT8 = 2,
ONNX_INT8 = 3, ONNX_INT8 = 3,
ONNX_UINT16 = 4, ONNX_UINT16 = 4,
ONNX_INT16 = 5, ONNX_INT16 = 5,
ONNX_INT32 = 6, ONNX_INT32 = 6,
ONNX_INT64 = 7, ONNX_INT64 = 7,
ONNX_STRING = 8, ONNX_STRING = 8,
ONNX_BOOL = 9, ONNX_BOOL = 9,
// IEEE754 half-precision floating-point format (16 bits wide). // IEEE754 half-precision floating-point format (16 bits wide).
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
ONNX_FLOAT16 = 10, ONNX_FLOAT16 = 10,
ONNX_DOUBLE = 11, ONNX_DOUBLE = 11,
ONNX_UINT32 = 12, ONNX_UINT32 = 12,
ONNX_UINT64 = 13, ONNX_UINT64 = 13,
ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components
ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components
// Non-IEEE floating-point format based on IEEE754 single-precision // Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits. // floating-point number truncated to 16 bits.
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
ONNX_BFLOAT16 = 16, ONNX_BFLOAT16 = 16,
} OnnxDataType; } OnnxDataType;
@ -43,226 +43,226 @@ const int32_t opset = 12;
/**** Style transfer model ****/ /**** Style transfer model ****/
void StyleTransfer::InitializeSession(int w, int h) void StyleTransfer::InitializeSession(int w, int h)
{ {
SetImageSize(720, 720); // Model input sizes fixed to 720x720. SetImageSize(720, 720); // Model input sizes fixed to 720x720.
m_session = CreateLearningModelSession(GetModel()); m_session = CreateLearningModelSession(GetModel());
m_binding = LearningModelBinding(m_session); m_binding = LearningModelBinding(m_session);
} }
void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest) void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest)
{ {
m_syncStarted = true; m_syncStarted = true;
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
SetVideoFrames(inVideoFrame, outVideoFrame); SetVideoFrames(inVideoFrame, outVideoFrame);
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
m_binding.Bind(inputName, m_inputVideoFrame); m_binding.Bind(inputName, m_inputVideoFrame);
hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name(); hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name();
auto outputBindProperties = PropertySet(); auto outputBindProperties = PropertySet();
outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true)); outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true));
m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties); m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties);
auto results = m_session.Evaluate(m_binding, L""); auto results = m_session.Evaluate(m_binding, L"");
m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
m_syncStarted = false; m_syncStarted = false;
} }
LearningModel StyleTransfer::GetModel() LearningModel StyleTransfer::GetModel()
{ {
auto modelPath = std::filesystem::path(m_modelBasePath.c_str()); auto modelPath = std::filesystem::path(m_modelBasePath.c_str());
modelPath.append("mosaic.onnx"); modelPath.append("mosaic.onnx");
return LearningModel::LoadFromFilePath(modelPath.c_str()); return LearningModel::LoadFromFilePath(modelPath.c_str());
} }
void BackgroundBlur::InitializeSession(int w, int h) void BackgroundBlur::InitializeSession(int w, int h)
{ {
w /= m_scale; h /= m_scale; w /= m_scale; h /= m_scale;
SetImageSize(w, h); SetImageSize(w, h);
auto joinOptions1 = LearningModelJoinOptions(); auto joinOptions1 = LearningModelJoinOptions();
joinOptions1.CloseModelOnJoin(true); joinOptions1.CloseModelOnJoin(true);
joinOptions1.Link(L"Output", L"input"); joinOptions1.Link(L"Output", L"input");
joinOptions1.JoinedNodePrefix(L"FCN_"); joinOptions1.JoinedNodePrefix(L"FCN_");
joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true); joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true);
auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev)); auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev));
LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1); LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1);
auto joinOptions2 = LearningModelJoinOptions(); auto joinOptions2 = LearningModelJoinOptions();
joinOptions2.CloseModelOnJoin(true); joinOptions2.CloseModelOnJoin(true);
joinOptions2.Link(L"FCN_out", L"InputScores"); joinOptions2.Link(L"FCN_out", L"InputScores");
joinOptions2.Link(L"OutputImageForward", L"InputImage"); joinOptions2.Link(L"OutputImageForward", L"InputImage");
joinOptions2.JoinedNodePrefix(L"Post_"); joinOptions2.JoinedNodePrefix(L"Post_");
//joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU //joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU
auto modelExperimental2 = LearningModelExperimental(intermediateModel); auto modelExperimental2 = LearningModelExperimental(intermediateModel);
LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2); LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2);
// Save the model for debugging purposes // Save the model for debugging purposes
//modelExperimental2.Save(L"modelFused.onnx"); //modelExperimental2.Save(L"modelFused.onnx");
m_session = CreateLearningModelSession(modelFused); m_session = CreateLearningModelSession(modelFused);
m_binding = LearningModelBinding(m_session); m_binding = LearningModelBinding(m_session);
} }
LearningModel BackgroundBlur::GetModel() LearningModel BackgroundBlur::GetModel()
{ {
auto model_path = std::filesystem::path(m_modelBasePath.c_str()); auto model_path = std::filesystem::path(m_modelBasePath.c_str());
model_path.append("fcn-resnet50-12.onnx"); model_path.append("fcn-resnet50-12.onnx");
return LearningModel::LoadFromFilePath(model_path.c_str()); return LearningModel::LoadFromFilePath(model_path.c_str());
} }
void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest) void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest)
{ {
m_syncStarted = true; m_syncStarted = true;
VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src);
VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest);
SetVideoFrames(inVideoFrame, outVideoFrame); SetVideoFrames(inVideoFrame, outVideoFrame);
// Shape validation // Shape validation
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels); assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels);
assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels); assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels);
hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name();
hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name(); hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name();
m_binding.Bind(inputName, m_inputVideoFrame); m_binding.Bind(inputName, m_inputVideoFrame);
m_binding.Bind(outputName, m_outputVideoFrame); m_binding.Bind(outputName, m_outputVideoFrame);
auto results = m_session.Evaluate(m_binding, L""); auto results = m_session.Evaluate(m_binding, L"");
m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); m_outputVideoFrame.CopyToAsync(outVideoFrame).get();
m_syncStarted = false; m_syncStarted = false;
} }
LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis) LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis)
{ {
auto builder = LearningModelBuilder::Create(opset) auto builder = LearningModelBuilder::Create(opset)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w })) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w }))
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w })) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w }))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w })) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w }))
// Argmax Model Outputs // Argmax Model Outputs
.Operators().Add(LearningModelOperator(L"ArgMax") .Operators().Add(LearningModelOperator(L"ArgMax")
.SetInput(L"data", L"InputScores") .SetInput(L"data", L"InputScores")
.SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 })) .SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 }))
.SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis })) .SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis }))
.SetOutput(L"reduced", L"Reduced")) .SetOutput(L"reduced", L"Reduced"))
.Operators().Add(LearningModelOperator(L"Cast") .Operators().Add(LearningModelOperator(L"Cast")
.SetInput(L"input", L"Reduced") .SetInput(L"input", L"Reduced")
.SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT })) .SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT }))
.SetOutput(L"output", L"ArgmaxOutput")) .SetOutput(L"output", L"ArgmaxOutput"))
// Extract the foreground using the argmax scores to create a mask // Extract the foreground using the argmax scores to create a mask
.Operators().Add(LearningModelOperator(L"Clip") .Operators().Add(LearningModelOperator(L"Clip")
.SetInput(L"input", L"ArgmaxOutput") .SetInput(L"input", L"ArgmaxOutput")
.SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f })) .SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f }))
.SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) .SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
.SetOutput(L"output", L"MaskBinary")) .SetOutput(L"output", L"MaskBinary"))
.Operators().Add(LearningModelOperator(L"Mul") .Operators().Add(LearningModelOperator(L"Mul")
.SetInput(L"A", L"InputImage") .SetInput(L"A", L"InputImage")
.SetInput(L"B", L"MaskBinary") .SetInput(L"B", L"MaskBinary")
.SetOutput(L"C", L"ForegroundImage")) .SetOutput(L"C", L"ForegroundImage"))
// Extract the blurred background using the negation of the foreground mask // Extract the blurred background using the negation of the foreground mask
.Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background .Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background
.SetInput(L"X", L"InputImage") .SetInput(L"X", L"InputImage")
.SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector<int64_t>{2}, std::array<int64_t, 2>{20, 20})) .SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector<int64_t>{2}, std::array<int64_t, 2>{20, 20}))
.SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector<int64_t>{1}, std::array<hstring, 1>{L"SAME_UPPER"})) .SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector<int64_t>{1}, std::array<hstring, 1>{L"SAME_UPPER"}))
.SetOutput(L"Y", L"BlurredImage")) .SetOutput(L"Y", L"BlurredImage"))
.Operators().Add(LearningModelOperator(L"Mul") .Operators().Add(LearningModelOperator(L"Mul")
.SetInput(L"A", L"MaskBinary") .SetInput(L"A", L"MaskBinary")
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
.SetOutput(L"C", L"NegMask")) .SetOutput(L"C", L"NegMask"))
.Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask) .Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask)
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f }))
.SetInput(L"B", L"NegMask") .SetInput(L"B", L"NegMask")
.SetOutput(L"C", L"BackgroundMask")) .SetOutput(L"C", L"BackgroundMask"))
.Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background .Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background
.SetInput(L"A", L"BlurredImage") .SetInput(L"A", L"BlurredImage")
.SetInput(L"B", L"BackgroundMask") .SetInput(L"B", L"BackgroundMask")
.SetOutput(L"C", L"BackgroundImage")) .SetOutput(L"C", L"BackgroundImage"))
// Combine foreground and background // Combine foreground and background
.Operators().Add(LearningModelOperator(L"Add") .Operators().Add(LearningModelOperator(L"Add")
.SetInput(L"A", L"ForegroundImage") .SetInput(L"A", L"ForegroundImage")
.SetInput(L"B", L"BackgroundImage") .SetInput(L"B", L"BackgroundImage")
.SetOutput(L"C", L"OutputImage")); .SetOutput(L"C", L"OutputImage"));
return builder.CreateModel(); return builder.CreateModel();
} }
LearningModel Invert(long n, long c, long h, long w) LearningModel Invert(long n, long c, long h, long w)
{ {
auto builder = LearningModelBuilder::Create(opset) auto builder = LearningModelBuilder::Create(opset)
// Loading in buffers and reshape // Loading in buffers and reshape
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w })) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w }))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w })) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w }))
.Operators().Add(LearningModelOperator(L"Mul") .Operators().Add(LearningModelOperator(L"Mul")
.SetInput(L"A", L"Input") .SetInput(L"A", L"Input")
.SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f }))
.SetOutput(L"C", L"MulOutput") .SetOutput(L"C", L"MulOutput")
) )
.Operators().Add(LearningModelOperator(L"Add") .Operators().Add(LearningModelOperator(L"Add")
.SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f })) .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f }))
.SetInput(L"B", L"MulOutput") .SetInput(L"B", L"MulOutput")
.SetOutput(L"C", L"Output")); .SetOutput(L"C", L"Output"));
return builder.CreateModel(); return builder.CreateModel();
} }
LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev) LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array<float, 3>& means, const std::array<float, 3>& stddev)
{ {
assert(means.size() == c); assert(means.size() == c);
assert(stddev.size() == c); assert(stddev.size() == c);
auto builder = LearningModelBuilder::Create(opset) auto builder = LearningModelBuilder::Create(opset)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w})) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w}))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w})) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w}))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w})) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w}))
.Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255 .Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255
.SetInput(L"A", L"Input") .SetInput(L"A", L"Input")
.SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f })) .SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f }))
.SetOutput(L"C", L"DivOutput")) .SetOutput(L"C", L"DivOutput"))
.Operators().Add(LearningModelOperator(L"Reshape") .Operators().Add(LearningModelOperator(L"Reshape")
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means)) .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means))
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
.SetOutput(L"reshaped", L"MeansReshaped")) .SetOutput(L"reshaped", L"MeansReshaped"))
.Operators().Add(LearningModelOperator(L"Reshape") .Operators().Add(LearningModelOperator(L"Reshape")
.SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev)) .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev))
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 }))
.SetOutput(L"reshaped", L"StdDevReshaped")) .SetOutput(L"reshaped", L"StdDevReshaped"))
.Operators().Add(LearningModelOperator(L"Sub") // Shift by the means .Operators().Add(LearningModelOperator(L"Sub") // Shift by the means
.SetInput(L"A", L"DivOutput") .SetInput(L"A", L"DivOutput")
.SetInput(L"B", L"MeansReshaped") .SetInput(L"B", L"MeansReshaped")
.SetOutput(L"C", L"SubOutput")) .SetOutput(L"C", L"SubOutput"))
.Operators().Add(LearningModelOperator(L"Div") // Divide by stddev .Operators().Add(LearningModelOperator(L"Div") // Divide by stddev
.SetInput(L"A", L"SubOutput") .SetInput(L"A", L"SubOutput")
.SetInput(L"B", L"StdDevReshaped") .SetInput(L"B", L"StdDevReshaped")
.SetOutput(L"C", L"Output")) .SetOutput(L"C", L"Output"))
.Operators().Add(LearningModelOperator(L"Identity") .Operators().Add(LearningModelOperator(L"Identity")
.SetInput(L"input", L"Input") .SetInput(L"input", L"Input")
.SetOutput(L"output", L"OutputImageForward")); .SetOutput(L"output", L"OutputImageForward"));
return builder.CreateModel(); return builder.CreateModel();
} }
LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w) LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w)
{ {
auto builder = LearningModelBuilder::Create(opset) auto builder = LearningModelBuilder::Create(opset)
// Loading in buffers and reshape // Loading in buffers and reshape
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w })) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w }))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w})) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w}))
.Operators().Add(LearningModelOperator((L"Cast")) .Operators().Add(LearningModelOperator((L"Cast"))
.SetInput(L"input", L"Input") .SetInput(L"input", L"Input")
.SetOutput(L"output", L"CastOutput") .SetOutput(L"output", L"CastOutput")
.SetAttribute(L"to", .SetAttribute(L"to",
TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT}))) TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT})))
.Operators().Add(LearningModelOperator(L"Reshape") .Operators().Add(LearningModelOperator(L"Reshape")
.SetInput(L"data", L"CastOutput") .SetInput(L"data", L"CastOutput")
.SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c})) .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c}))
.SetOutput(L"reshaped", L"ReshapeOutput")) .SetOutput(L"reshaped", L"ReshapeOutput"))
.Operators().Add(LearningModelOperator(L"Transpose") .Operators().Add(LearningModelOperator(L"Transpose")
.SetInput(L"data", L"ReshapeOutput") .SetInput(L"data", L"ReshapeOutput")
.SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 })) .SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 }))
.SetOutput(L"transposed", L"Output")); .SetOutput(L"transposed", L"Output"));
return builder.CreateModel(); return builder.CreateModel();
} }

Просмотреть файл

@ -31,107 +31,109 @@ LearningModel Invert(long n, long c, long h, long w);
class StreamModelBase class StreamModelBase
{ {
public: public:
StreamModelBase() : StreamModelBase() :
m_inputVideoFrame(nullptr), m_inputVideoFrame(nullptr),
m_outputVideoFrame(nullptr), m_outputVideoFrame(nullptr),
m_session(nullptr), m_session(nullptr),
m_binding(nullptr), m_binding(nullptr),
m_syncStarted(false) {} m_syncStarted(false) {}
virtual ~StreamModelBase() { virtual ~StreamModelBase() {
if(m_session) m_session.Close(); if(m_session) m_session.Close();
if(m_binding) m_binding.Clear(); if(m_binding) m_binding.Clear();
if (m_inputVideoFrame) m_inputVideoFrame.Close(); if (m_inputVideoFrame) m_inputVideoFrame.Close();
if (m_outputVideoFrame) m_outputVideoFrame.Close(); if (m_outputVideoFrame) m_outputVideoFrame.Close();
}; };
virtual void InitializeSession(int w, int h) = 0; virtual void InitializeSession(int w, int h) = 0;
virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0; virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0;
// Synchronous eval status // Synchronous eval status
bool m_syncStarted = false; bool m_syncStarted = false;
VideoFrame m_outputVideoFrame; VideoFrame m_outputVideoFrame;
static const int m_scale = 4; static const int m_scale = 4;
winrt::hstring m_modelBasePath; winrt::hstring m_modelBasePath;
protected: protected:
// Cache input frames into a shareable d3d-backed VideoFrame // Cache input frames into a shareable d3d-backed VideoFrame
void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame) void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame)
{ {
if (true || !m_videoFramesSet) if (true || !m_videoFramesSet)
{ {
auto device = m_session.Device().Direct3D11Device(); auto device = m_session.Device().Direct3D11Device();
auto inDesc = inVideoFrame.Direct3DSurface().Description(); auto inDesc = inVideoFrame.Direct3DSurface().Description();
auto outDesc = outVideoFrame.Direct3DSurface().Description(); auto outDesc = outVideoFrame.Direct3DSurface().Description();
/* /*
NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order
whereas every model created with LearningModelBuilder takes arguments in (height, width) order. whereas every model created with LearningModelBuilder takes arguments in (height, width) order.
*/ */
auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized; auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized;
m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device);
m_videoFramesSet = true; m_videoFramesSet = true;
} }
// NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine. // NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine.
// Copying to a new VideoFrame makes it shareable for use in inference. // Copying to a new VideoFrame makes it shareable for use in inference.
inVideoFrame.CopyToAsync(m_inputVideoFrame).get(); inVideoFrame.CopyToAsync(m_inputVideoFrame).get();
outVideoFrame.CopyToAsync(m_outputVideoFrame).get(); outVideoFrame.CopyToAsync(m_outputVideoFrame).get();
} }
void SetImageSize(int w, int h) { void SetImageSize(int w, int h)
m_imageWidthInPixels = w; {
m_imageHeightInPixels = h; m_imageWidthInPixels = w;
} m_imageHeightInPixels = h;
}
LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true) {
auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default);
auto options = LearningModelSessionOptions();
options.BatchSizeOverride(0);
options.CloseModelOnSessionCreation(closedModel);
auto session = LearningModelSession(model, device, options);
return session;
}
bool m_useGPU = true; LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true)
bool m_videoFramesSet = false; {
VideoFrame m_inputVideoFrame; auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default);
auto options = LearningModelSessionOptions();
UINT32 m_imageWidthInPixels = 0; options.BatchSizeOverride(0);
UINT32 m_imageHeightInPixels = 0; options.CloseModelOnSessionCreation(closedModel);
auto session = LearningModelSession(model, device, options);
return session;
}
// Learning Model Binding and Session. bool m_useGPU = true;
LearningModelSession m_session; bool m_videoFramesSet = false;
LearningModelBinding m_binding; VideoFrame m_inputVideoFrame;
UINT32 m_imageWidthInPixels = 0;
UINT32 m_imageHeightInPixels = 0;
// Learning Model Binding and Session.
LearningModelSession m_session;
LearningModelBinding m_binding;
}; };
class StyleTransfer : public StreamModelBase class StyleTransfer : public StreamModelBase
{ {
public: public:
StyleTransfer() : StreamModelBase() {}; StyleTransfer() : StreamModelBase() {};
void InitializeSession(int w, int h); void InitializeSession(int w, int h);
void Run(IDirect3DSurface src, IDirect3DSurface dest); void Run(IDirect3DSurface src, IDirect3DSurface dest);
private: private:
LearningModel GetModel(); LearningModel GetModel();
}; };
class BackgroundBlur : public StreamModelBase class BackgroundBlur : public StreamModelBase
{ {
public: public:
BackgroundBlur() : BackgroundBlur() :
StreamModelBase() StreamModelBase()
{}; {};
void InitializeSession(int w, int h); void InitializeSession(int w, int h);
void Run(IDirect3DSurface src, IDirect3DSurface dest); void Run(IDirect3DSurface src, IDirect3DSurface dest);
private: private:
LearningModel GetModel(); LearningModel GetModel();
LearningModel PostProcess(long n, long c, long h, long w, long axis); LearningModel PostProcess(long n, long c, long h, long w, long axis);
// Mean and standard deviation for z-score normalization during preprocessing. // Mean and standard deviation for z-score normalization during preprocessing.
std::array<float, 3> m_mean = { 0.485f, 0.456f, 0.406f }; std::array<float, 3> m_mean = { 0.485f, 0.456f, 0.406f };
std::array<float, 3> m_stddev = { 0.229f, 0.224f, 0.225f }; std::array<float, 3> m_stddev = { 0.229f, 0.224f, 0.225f };
}; };