diff --git a/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.cpp b/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.cpp index 075f825e..e5aa3754 100644 --- a/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.cpp +++ b/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.cpp @@ -9,32 +9,32 @@ using namespace winrt; using namespace Windows::Foundation::Collections; enum OnnxDataType : long { - ONNX_UNDEFINED = 0, - // Basic types. - ONNX_FLOAT = 1, - ONNX_UINT8 = 2, - ONNX_INT8 = 3, - ONNX_UINT16 = 4, - ONNX_INT16 = 5, - ONNX_INT32 = 6, - ONNX_INT64 = 7, - ONNX_STRING = 8, - ONNX_BOOL = 9, + ONNX_UNDEFINED = 0, + // Basic types. + ONNX_FLOAT = 1, + ONNX_UINT8 = 2, + ONNX_INT8 = 3, + ONNX_UINT16 = 4, + ONNX_INT16 = 5, + ONNX_INT32 = 6, + ONNX_INT64 = 7, + ONNX_STRING = 8, + ONNX_BOOL = 9, - // IEEE754 half-precision floating-point format (16 bits wide). - // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. - ONNX_FLOAT16 = 10, + // IEEE754 half-precision floating-point format (16 bits wide). + // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. + ONNX_FLOAT16 = 10, - ONNX_DOUBLE = 11, - ONNX_UINT32 = 12, - ONNX_UINT64 = 13, - ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components - ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components + ONNX_DOUBLE = 11, + ONNX_UINT32 = 12, + ONNX_UINT64 = 13, + ONNX_COMPLEX64 = 14, // complex with float32 real and imaginary components + ONNX_COMPLEX128 = 15, // complex with float64 real and imaginary components - // Non-IEEE floating-point format based on IEEE754 single-precision - // floating-point number truncated to 16 bits. - // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. - ONNX_BFLOAT16 = 16, + // Non-IEEE floating-point format based on IEEE754 single-precision + // floating-point number truncated to 16 bits. + // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. + ONNX_BFLOAT16 = 16, } OnnxDataType; @@ -43,226 +43,226 @@ const int32_t opset = 12; /**** Style transfer model ****/ void StyleTransfer::InitializeSession(int w, int h) { - SetImageSize(720, 720); // Model input sizes fixed to 720x720. - m_session = CreateLearningModelSession(GetModel()); - m_binding = LearningModelBinding(m_session); + SetImageSize(720, 720); // Model input sizes fixed to 720x720. + m_session = CreateLearningModelSession(GetModel()); + m_binding = LearningModelBinding(m_session); } void StyleTransfer::Run(IDirect3DSurface src, IDirect3DSurface dest) { - m_syncStarted = true; + m_syncStarted = true; - VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); - VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); - SetVideoFrames(inVideoFrame, outVideoFrame); + VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); + VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); + SetVideoFrames(inVideoFrame, outVideoFrame); - hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); - m_binding.Bind(inputName, m_inputVideoFrame); - hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name(); + hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); + m_binding.Bind(inputName, m_inputVideoFrame); + hstring outputName = m_session.Model().OutputFeatures().GetAt(0).Name(); - auto outputBindProperties = PropertySet(); - outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true)); + auto outputBindProperties = PropertySet(); + outputBindProperties.Insert(L"DisableTensorCpuSync", PropertyValue::CreateBoolean(true)); - m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties); - auto results = m_session.Evaluate(m_binding, L""); + m_binding.Bind(outputName, m_outputVideoFrame, outputBindProperties); + auto results = m_session.Evaluate(m_binding, L""); - m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); + m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); - m_syncStarted = false; + m_syncStarted = false; } LearningModel StyleTransfer::GetModel() { - auto modelPath = std::filesystem::path(m_modelBasePath.c_str()); - modelPath.append("mosaic.onnx"); - return LearningModel::LoadFromFilePath(modelPath.c_str()); + auto modelPath = std::filesystem::path(m_modelBasePath.c_str()); + modelPath.append("mosaic.onnx"); + return LearningModel::LoadFromFilePath(modelPath.c_str()); } void BackgroundBlur::InitializeSession(int w, int h) { - w /= m_scale; h /= m_scale; - SetImageSize(w, h); + w /= m_scale; h /= m_scale; + SetImageSize(w, h); - auto joinOptions1 = LearningModelJoinOptions(); - joinOptions1.CloseModelOnJoin(true); - joinOptions1.Link(L"Output", L"input"); - joinOptions1.JoinedNodePrefix(L"FCN_"); - joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true); - auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev)); - LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1); + auto joinOptions1 = LearningModelJoinOptions(); + joinOptions1.CloseModelOnJoin(true); + joinOptions1.Link(L"Output", L"input"); + joinOptions1.JoinedNodePrefix(L"FCN_"); + joinOptions1.PromoteUnlinkedOutputsToFusedOutputs(true); + auto modelExperimental1 = LearningModelExperimental(Normalize0_1ThenZScore(h, w, 3, m_mean, m_stddev)); + LearningModel intermediateModel = modelExperimental1.JoinModel(GetModel(), joinOptions1); - auto joinOptions2 = LearningModelJoinOptions(); - joinOptions2.CloseModelOnJoin(true); - joinOptions2.Link(L"FCN_out", L"InputScores"); - joinOptions2.Link(L"OutputImageForward", L"InputImage"); - joinOptions2.JoinedNodePrefix(L"Post_"); - //joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU - auto modelExperimental2 = LearningModelExperimental(intermediateModel); - LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2); + auto joinOptions2 = LearningModelJoinOptions(); + joinOptions2.CloseModelOnJoin(true); + joinOptions2.Link(L"FCN_out", L"InputScores"); + joinOptions2.Link(L"OutputImageForward", L"InputImage"); + joinOptions2.JoinedNodePrefix(L"Post_"); + //joinOptions2.PromoteUnlinkedOutputsToFusedOutputs(false); // TODO: Causes winrt originate error in FusedGraphKernel.cpp, but works on CPU + auto modelExperimental2 = LearningModelExperimental(intermediateModel); + LearningModel modelFused = modelExperimental2.JoinModel(PostProcess(1, 3, h, w, 1), joinOptions2); - // Save the model for debugging purposes - //modelExperimental2.Save(L"modelFused.onnx"); + // Save the model for debugging purposes + //modelExperimental2.Save(L"modelFused.onnx"); - m_session = CreateLearningModelSession(modelFused); - m_binding = LearningModelBinding(m_session); + m_session = CreateLearningModelSession(modelFused); + m_binding = LearningModelBinding(m_session); } LearningModel BackgroundBlur::GetModel() { - auto model_path = std::filesystem::path(m_modelBasePath.c_str()); - model_path.append("fcn-resnet50-12.onnx"); - return LearningModel::LoadFromFilePath(model_path.c_str()); + auto model_path = std::filesystem::path(m_modelBasePath.c_str()); + model_path.append("fcn-resnet50-12.onnx"); + return LearningModel::LoadFromFilePath(model_path.c_str()); } void BackgroundBlur::Run(IDirect3DSurface src, IDirect3DSurface dest) { - m_syncStarted = true; + m_syncStarted = true; - VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); - VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); - SetVideoFrames(inVideoFrame, outVideoFrame); + VideoFrame inVideoFrame = VideoFrame::CreateWithDirect3D11Surface(src); + VideoFrame outVideoFrame = VideoFrame::CreateWithDirect3D11Surface(dest); + SetVideoFrames(inVideoFrame, outVideoFrame); - // Shape validation - assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels); - assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels); + // Shape validation + assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Height == m_imageHeightInPixels); + assert((UINT32)m_inputVideoFrame.Direct3DSurface().Description().Width == m_imageWidthInPixels); - hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); - hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name(); + hstring inputName = m_session.Model().InputFeatures().GetAt(0).Name(); + hstring outputName = m_session.Model().OutputFeatures().GetAt(1).Name(); - m_binding.Bind(inputName, m_inputVideoFrame); - m_binding.Bind(outputName, m_outputVideoFrame); - auto results = m_session.Evaluate(m_binding, L""); - m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); - m_syncStarted = false; + m_binding.Bind(inputName, m_inputVideoFrame); + m_binding.Bind(outputName, m_outputVideoFrame); + auto results = m_session.Evaluate(m_binding, L""); + m_outputVideoFrame.CopyToAsync(outVideoFrame).get(); + m_syncStarted = false; } LearningModel BackgroundBlur::PostProcess(long n, long c, long h, long w, long axis) { - auto builder = LearningModelBuilder::Create(opset) - .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w })) - .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w })) - .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w })) - // Argmax Model Outputs - .Operators().Add(LearningModelOperator(L"ArgMax") - .SetInput(L"data", L"InputScores") - .SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 })) - .SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis })) - .SetOutput(L"reduced", L"Reduced")) - .Operators().Add(LearningModelOperator(L"Cast") - .SetInput(L"input", L"Reduced") - .SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT })) - .SetOutput(L"output", L"ArgmaxOutput")) - // Extract the foreground using the argmax scores to create a mask - .Operators().Add(LearningModelOperator(L"Clip") - .SetInput(L"input", L"ArgmaxOutput") - .SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f })) - .SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) - .SetOutput(L"output", L"MaskBinary")) - .Operators().Add(LearningModelOperator(L"Mul") - .SetInput(L"A", L"InputImage") - .SetInput(L"B", L"MaskBinary") - .SetOutput(L"C", L"ForegroundImage")) + auto builder = LearningModelBuilder::Create(opset) + .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputImage", TensorKind::Float, { n, c, h, w })) + .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"InputScores", TensorKind::Float, { -1, -1, h, w })) + .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImage", TensorKind::Float, { n, c, h, w })) + // Argmax Model Outputs + .Operators().Add(LearningModelOperator(L"ArgMax") + .SetInput(L"data", L"InputScores") + .SetAttribute(L"keepdims", TensorInt64Bit::CreateFromArray({ 1 }, { 1 })) + .SetAttribute(L"axis", TensorInt64Bit::CreateFromIterable({ 1 }, { axis })) + .SetOutput(L"reduced", L"Reduced")) + .Operators().Add(LearningModelOperator(L"Cast") + .SetInput(L"input", L"Reduced") + .SetAttribute(L"to", TensorInt64Bit::CreateFromIterable({}, { OnnxDataType::ONNX_FLOAT })) + .SetOutput(L"output", L"ArgmaxOutput")) + // Extract the foreground using the argmax scores to create a mask + .Operators().Add(LearningModelOperator(L"Clip") + .SetInput(L"input", L"ArgmaxOutput") + .SetConstant(L"min", TensorFloat::CreateFromIterable({ 1 }, { 0.f })) + .SetConstant(L"max", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) + .SetOutput(L"output", L"MaskBinary")) + .Operators().Add(LearningModelOperator(L"Mul") + .SetInput(L"A", L"InputImage") + .SetInput(L"B", L"MaskBinary") + .SetOutput(L"C", L"ForegroundImage")) - // Extract the blurred background using the negation of the foreground mask - .Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background - .SetInput(L"X", L"InputImage") - .SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector{2}, std::array{20, 20})) - .SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector{1}, std::array{L"SAME_UPPER"})) - .SetOutput(L"Y", L"BlurredImage")) - .Operators().Add(LearningModelOperator(L"Mul") - .SetInput(L"A", L"MaskBinary") - .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) - .SetOutput(L"C", L"NegMask")) - .Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask) - .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) - .SetInput(L"B", L"NegMask") - .SetOutput(L"C", L"BackgroundMask")) - .Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background - .SetInput(L"A", L"BlurredImage") - .SetInput(L"B", L"BackgroundMask") - .SetOutput(L"C", L"BackgroundImage")) + // Extract the blurred background using the negation of the foreground mask + .Operators().Add(LearningModelOperator(L"AveragePool") // AveragePool to create blurred background + .SetInput(L"X", L"InputImage") + .SetAttribute(L"kernel_shape", TensorInt64Bit::CreateFromArray(std::vector{2}, std::array{20, 20})) + .SetAttribute(L"auto_pad", TensorString::CreateFromArray(std::vector{1}, std::array{L"SAME_UPPER"})) + .SetOutput(L"Y", L"BlurredImage")) + .Operators().Add(LearningModelOperator(L"Mul") + .SetInput(L"A", L"MaskBinary") + .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) + .SetOutput(L"C", L"NegMask")) + .Operators().Add(LearningModelOperator(L"Add") // BackgroundMask = (1- foreground Mask) + .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 1.f })) + .SetInput(L"B", L"NegMask") + .SetOutput(L"C", L"BackgroundMask")) + .Operators().Add(LearningModelOperator(L"Mul") // Extract the blurred background + .SetInput(L"A", L"BlurredImage") + .SetInput(L"B", L"BackgroundMask") + .SetOutput(L"C", L"BackgroundImage")) - // Combine foreground and background - .Operators().Add(LearningModelOperator(L"Add") - .SetInput(L"A", L"ForegroundImage") - .SetInput(L"B", L"BackgroundImage") - .SetOutput(L"C", L"OutputImage")); + // Combine foreground and background + .Operators().Add(LearningModelOperator(L"Add") + .SetInput(L"A", L"ForegroundImage") + .SetInput(L"B", L"BackgroundImage") + .SetOutput(L"C", L"OutputImage")); - return builder.CreateModel(); + return builder.CreateModel(); } LearningModel Invert(long n, long c, long h, long w) { - auto builder = LearningModelBuilder::Create(opset) - // Loading in buffers and reshape - .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w })) - .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w })) - .Operators().Add(LearningModelOperator(L"Mul") - .SetInput(L"A", L"Input") - .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) - .SetOutput(L"C", L"MulOutput") - ) - .Operators().Add(LearningModelOperator(L"Add") - .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f })) - .SetInput(L"B", L"MulOutput") - .SetOutput(L"C", L"Output")); + auto builder = LearningModelBuilder::Create(opset) + // Loading in buffers and reshape + .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Float, { n, c, h, w })) + .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, { n, c, h, w })) + .Operators().Add(LearningModelOperator(L"Mul") + .SetInput(L"A", L"Input") + .SetConstant(L"B", TensorFloat::CreateFromIterable({ 1 }, { -1.f })) + .SetOutput(L"C", L"MulOutput") + ) + .Operators().Add(LearningModelOperator(L"Add") + .SetConstant(L"A", TensorFloat::CreateFromIterable({ 1 }, { 255.f })) + .SetInput(L"B", L"MulOutput") + .SetOutput(L"C", L"Output")); - return builder.CreateModel(); + return builder.CreateModel(); } LearningModel Normalize0_1ThenZScore(long h, long w, long c, const std::array& means, const std::array& stddev) { - assert(means.size() == c); - assert(stddev.size() == c); + assert(means.size() == c); + assert(stddev.size() == c); - auto builder = LearningModelBuilder::Create(opset) - .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w})) - .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w})) - .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w})) - .Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255 - .SetInput(L"A", L"Input") - .SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f })) - .SetOutput(L"C", L"DivOutput")) - .Operators().Add(LearningModelOperator(L"Reshape") - .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means)) - .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) - .SetOutput(L"reshaped", L"MeansReshaped")) - .Operators().Add(LearningModelOperator(L"Reshape") - .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev)) - .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) - .SetOutput(L"reshaped", L"StdDevReshaped")) - .Operators().Add(LearningModelOperator(L"Sub") // Shift by the means - .SetInput(L"A", L"DivOutput") - .SetInput(L"B", L"MeansReshaped") - .SetOutput(L"C", L"SubOutput")) - .Operators().Add(LearningModelOperator(L"Div") // Divide by stddev - .SetInput(L"A", L"SubOutput") - .SetInput(L"B", L"StdDevReshaped") - .SetOutput(L"C", L"Output")) - .Operators().Add(LearningModelOperator(L"Identity") - .SetInput(L"input", L"Input") - .SetOutput(L"output", L"OutputImageForward")); - return builder.CreateModel(); + auto builder = LearningModelBuilder::Create(opset) + .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NCHW image", TensorKind::Float, {1, c, h, w})) + .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", L"The NCHW image normalized with mean and stddev.", TensorKind::Float, {1, c, h, w})) + .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"OutputImageForward", L"The NCHW image forwarded through the model.", TensorKind::Float, {1, c, h, w})) + .Operators().Add(LearningModelOperator(L"Div") // Normalize from 0-255 to 0-1 by dividing by 255 + .SetInput(L"A", L"Input") + .SetConstant(L"B", TensorFloat::CreateFromArray({}, { 255.f })) + .SetOutput(L"C", L"DivOutput")) + .Operators().Add(LearningModelOperator(L"Reshape") + .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, means)) + .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) + .SetOutput(L"reshaped", L"MeansReshaped")) + .Operators().Add(LearningModelOperator(L"Reshape") + .SetConstant(L"data", TensorFloat::CreateFromArray({ c }, stddev)) + .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({ 4 }, { 1, c, 1, 1 })) + .SetOutput(L"reshaped", L"StdDevReshaped")) + .Operators().Add(LearningModelOperator(L"Sub") // Shift by the means + .SetInput(L"A", L"DivOutput") + .SetInput(L"B", L"MeansReshaped") + .SetOutput(L"C", L"SubOutput")) + .Operators().Add(LearningModelOperator(L"Div") // Divide by stddev + .SetInput(L"A", L"SubOutput") + .SetInput(L"B", L"StdDevReshaped") + .SetOutput(L"C", L"Output")) + .Operators().Add(LearningModelOperator(L"Identity") + .SetInput(L"input", L"Input") + .SetOutput(L"output", L"OutputImageForward")); + return builder.CreateModel(); } LearningModel ReshapeFlatBufferToNCHW(long n, long c, long h, long w) { - auto builder = LearningModelBuilder::Create(opset) - // Loading in buffers and reshape - .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w })) - .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w})) - .Operators().Add(LearningModelOperator((L"Cast")) - .SetInput(L"input", L"Input") - .SetOutput(L"output", L"CastOutput") - .SetAttribute(L"to", - TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT}))) - .Operators().Add(LearningModelOperator(L"Reshape") - .SetInput(L"data", L"CastOutput") - .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c})) - .SetOutput(L"reshaped", L"ReshapeOutput")) - .Operators().Add(LearningModelOperator(L"Transpose") - .SetInput(L"data", L"ReshapeOutput") - .SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 })) - .SetOutput(L"transposed", L"Output")); - return builder.CreateModel(); + auto builder = LearningModelBuilder::Create(opset) + // Loading in buffers and reshape + .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::UInt8, { 1, n * c * h * w })) + .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", TensorKind::Float, {n, c, h, w})) + .Operators().Add(LearningModelOperator((L"Cast")) + .SetInput(L"input", L"Input") + .SetOutput(L"output", L"CastOutput") + .SetAttribute(L"to", + TensorInt64Bit::CreateFromIterable({}, {OnnxDataType::ONNX_FLOAT}))) + .Operators().Add(LearningModelOperator(L"Reshape") + .SetInput(L"data", L"CastOutput") + .SetConstant(L"shape", TensorInt64Bit::CreateFromIterable({4}, {n, h, w, c})) + .SetOutput(L"reshaped", L"ReshapeOutput")) + .Operators().Add(LearningModelOperator(L"Transpose") + .SetInput(L"data", L"ReshapeOutput") + .SetAttribute(L"perm", TensorInt64Bit::CreateFromArray({ 4 }, { 0, 3, 1, 2 })) + .SetOutput(L"transposed", L"Output")); + return builder.CreateModel(); } diff --git a/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.h b/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.h index d83978dc..d74fa7d0 100644 --- a/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.h +++ b/Samples/WinMLSamplesGallery/WinMLSamplesGalleryNative/SegmentModel.h @@ -31,107 +31,109 @@ LearningModel Invert(long n, long c, long h, long w); class StreamModelBase { public: - StreamModelBase() : - m_inputVideoFrame(nullptr), - m_outputVideoFrame(nullptr), - m_session(nullptr), - m_binding(nullptr), - m_syncStarted(false) {} + StreamModelBase() : + m_inputVideoFrame(nullptr), + m_outputVideoFrame(nullptr), + m_session(nullptr), + m_binding(nullptr), + m_syncStarted(false) {} - virtual ~StreamModelBase() { - if(m_session) m_session.Close(); - if(m_binding) m_binding.Clear(); - if (m_inputVideoFrame) m_inputVideoFrame.Close(); - if (m_outputVideoFrame) m_outputVideoFrame.Close(); - }; + virtual ~StreamModelBase() { + if(m_session) m_session.Close(); + if(m_binding) m_binding.Clear(); + if (m_inputVideoFrame) m_inputVideoFrame.Close(); + if (m_outputVideoFrame) m_outputVideoFrame.Close(); + }; - virtual void InitializeSession(int w, int h) = 0; - virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0; - - // Synchronous eval status - bool m_syncStarted = false; - VideoFrame m_outputVideoFrame; - static const int m_scale = 4; - winrt::hstring m_modelBasePath; + virtual void InitializeSession(int w, int h) = 0; + virtual void Run(IDirect3DSurface src, IDirect3DSurface dest) = 0; + + // Synchronous eval status + bool m_syncStarted = false; + VideoFrame m_outputVideoFrame; + static const int m_scale = 4; + winrt::hstring m_modelBasePath; protected: - // Cache input frames into a shareable d3d-backed VideoFrame - void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame) - { - if (true || !m_videoFramesSet) - { - auto device = m_session.Device().Direct3D11Device(); - auto inDesc = inVideoFrame.Direct3DSurface().Description(); - auto outDesc = outVideoFrame.Direct3DSurface().Description(); - /* - NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order - whereas every model created with LearningModelBuilder takes arguments in (height, width) order. - */ - auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized; - m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); - m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); - m_videoFramesSet = true; - } - // NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine. - // Copying to a new VideoFrame makes it shareable for use in inference. - inVideoFrame.CopyToAsync(m_inputVideoFrame).get(); - outVideoFrame.CopyToAsync(m_outputVideoFrame).get(); - } + // Cache input frames into a shareable d3d-backed VideoFrame + void SetVideoFrames(VideoFrame inVideoFrame, VideoFrame outVideoFrame) + { + if (true || !m_videoFramesSet) + { + auto device = m_session.Device().Direct3D11Device(); + auto inDesc = inVideoFrame.Direct3DSurface().Description(); + auto outDesc = outVideoFrame.Direct3DSurface().Description(); + /* + NOTE: VideoFrame::CreateAsDirect3D11SurfaceBacked takes arguments in (width, height) order + whereas every model created with LearningModelBuilder takes arguments in (height, width) order. + */ + auto format = winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8X8UIntNormalized; + m_inputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); + m_outputVideoFrame = VideoFrame::CreateAsDirect3D11SurfaceBacked(format, m_imageWidthInPixels, m_imageHeightInPixels, device); + m_videoFramesSet = true; + } + // NOTE: WinML supports mainly RGB-formatted video frames, which aren't backed by a shareable surface by the Capture Engine. + // Copying to a new VideoFrame makes it shareable for use in inference. + inVideoFrame.CopyToAsync(m_inputVideoFrame).get(); + outVideoFrame.CopyToAsync(m_outputVideoFrame).get(); + } - void SetImageSize(int w, int h) { - m_imageWidthInPixels = w; - m_imageHeightInPixels = h; - } - - LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true) { - auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default); - auto options = LearningModelSessionOptions(); - options.BatchSizeOverride(0); - options.CloseModelOnSessionCreation(closedModel); - auto session = LearningModelSession(model, device, options); - return session; - } + void SetImageSize(int w, int h) + { + m_imageWidthInPixels = w; + m_imageHeightInPixels = h; + } + - bool m_useGPU = true; - bool m_videoFramesSet = false; - VideoFrame m_inputVideoFrame; - - UINT32 m_imageWidthInPixels = 0; - UINT32 m_imageHeightInPixels = 0; + LearningModelSession CreateLearningModelSession(const LearningModel& model, bool closedModel = true) + { + auto device = LearningModelDevice(m_useGPU ? LearningModelDeviceKind::DirectXHighPerformance : LearningModelDeviceKind::Default); + auto options = LearningModelSessionOptions(); + options.BatchSizeOverride(0); + options.CloseModelOnSessionCreation(closedModel); + auto session = LearningModelSession(model, device, options); + return session; + } - // Learning Model Binding and Session. - LearningModelSession m_session; - LearningModelBinding m_binding; + bool m_useGPU = true; + bool m_videoFramesSet = false; + VideoFrame m_inputVideoFrame; + UINT32 m_imageWidthInPixels = 0; + UINT32 m_imageHeightInPixels = 0; + + // Learning Model Binding and Session. + LearningModelSession m_session; + LearningModelBinding m_binding; }; class StyleTransfer : public StreamModelBase { public: - StyleTransfer() : StreamModelBase() {}; - void InitializeSession(int w, int h); - void Run(IDirect3DSurface src, IDirect3DSurface dest); + StyleTransfer() : StreamModelBase() {}; + void InitializeSession(int w, int h); + void Run(IDirect3DSurface src, IDirect3DSurface dest); private: - LearningModel GetModel(); + LearningModel GetModel(); }; class BackgroundBlur : public StreamModelBase { public: - BackgroundBlur() : - StreamModelBase() - {}; - void InitializeSession(int w, int h); - void Run(IDirect3DSurface src, IDirect3DSurface dest); + BackgroundBlur() : + StreamModelBase() + {}; + void InitializeSession(int w, int h); + void Run(IDirect3DSurface src, IDirect3DSurface dest); private: - LearningModel GetModel(); - LearningModel PostProcess(long n, long c, long h, long w, long axis); - - // Mean and standard deviation for z-score normalization during preprocessing. - std::array m_mean = { 0.485f, 0.456f, 0.406f }; - std::array m_stddev = { 0.229f, 0.224f, 0.225f }; + LearningModel GetModel(); + LearningModel PostProcess(long n, long c, long h, long w, long axis); + + // Mean and standard deviation for z-score normalization during preprocessing. + std::array m_mean = { 0.485f, 0.456f, 0.406f }; + std::array m_stddev = { 0.229f, 0.224f, 0.225f }; }; \ No newline at end of file