Enable cpp20 builds for DML EP and WinML API (#17800)

Enable cpp20 builds for DML EP and WinML API 1) Missing typename for templated types 2) unmove helper for inline references to rvalue temporaries This is okay since per the standard a temporary bound to a reference parameter in a function call exists until the end of the full expression containing that function call: if the function returns a reference, which outlives the full expression, it becomes a dangling reference. 3) static now not needed for template specializations --------- Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
2023-10-06 10:33:38 -07:00 · 2023-10-06 10:33:38 -07:00 · cb9408e89c
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
@ -13,7 +13,7 @@ namespace Dml
        ComPtr<ID3D12Resource> resource;
        auto buffer = CD3DX12_RESOURCE_DESC::Buffer(size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
        ORT_THROW_IF_FAILED(m_device->CreateCommittedResource(
-            &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+            unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
            D3D12_HEAP_FLAG_NONE,
            &buffer,
            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
@ -6,6 +6,11 @@
 #include <assert.h>
 #include "core/providers/dml/OperatorAuthorHelper/Common.h"

+template <typename T>
+auto unmove_ptr(T&& t) {
+    return &static_cast<T&>(t);
+}
+
 namespace Dml
 {
    using namespace OperatorHelper;
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
@ -636,7 +636,7 @@ namespace Dml

    bool IsCpuOnDmlOperator(const onnxruntime::Node& node)
    {
-        auto cpuOnDmlOperators = std::array<char*, 8>{
+        auto cpuOnDmlOperators = std::array<const char*, 8>{
            "SequenceAt",
            "SequenceConstruct",
            "SequenceEmpty",
@ -659,7 +659,7 @@ namespace Dml

    bool IsDmlSequenceOperator(const onnxruntime::Node& node)
    {
-        auto sequence_ops = std::array<char*, 1>{
+        auto sequence_ops = std::array<const char*, 1>{
            "ConcatFromSequence"
        };

@ -675,7 +675,7 @@ namespace Dml

    bool IsCustomOpShader(const onnxruntime::Node& node)
    {
-        auto custom_ops = std::array<char*, 3>{
+        auto custom_ops = std::array<const char*, 3>{
            "DFT",
            "STFT",
            "GridSample"
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
@ -143,7 +143,8 @@ public:
        );

        DML_EXECUTION_FLAGS executionFlags = GetExecutionFlags();
-        m_compiledOperator.Attach(graph.Compile(executionFlags, { batchNormalization }).Detach());
+        std::array<dml::Expression, 1> outputs = { batchNormalization };
+        m_compiledOperator.Attach(graph.Compile(executionFlags, outputs).Detach());
    }

    void Compute(const MLOperatorKernelContext& kernelContext) override
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
@ -84,7 +84,7 @@ public:
            poolingDesc.EndPadding = m_kernel.endPadding;

            DML_OPERATOR_DESC opDesc = {};
-            opDesc.Type = ApiTraits::OperatorDescTraits<std::remove_reference<decltype(poolingDesc)>::type>::Type;
+            opDesc.Type = ApiTraits::OperatorDescTraits<typename std::remove_reference<decltype(poolingDesc)>::type>::Type;
            opDesc.Desc = &poolingDesc;
            SetDmlOperatorDesc(opDesc, kernelInfo);
        };
--- a/winml/lib/Api.Image/CpuDetensorizer.h
+++ b/winml/lib/Api.Image/CpuDetensorizer.h
@ -128,8 +128,12 @@ class CpuDetensorizer {
    return nominalRangeConverter.Denormalize(*pCPUTensor);
  }

+  // clang-format off
  template <>
-  static float ReadTensor<DirectX::PackedVector::HALF>(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  float ReadTensor<DirectX::PackedVector::HALF>(
    const DirectX::PackedVector::HALF* pCPUTensor, const NominalRangeConverter& nominalRangeConverter
  ) {
    return nominalRangeConverter.Denormalize(DirectX::PackedVector::XMConvertHalfToFloat(*pCPUTensor));
@ -167,9 +171,13 @@ class CpuDetensorizer {
    }
  }

+  // clang-format off
 #if defined(_M_AMD64) || defined(_M_IX86)
  template <>
-  static void InterleaveRowFloatToByte(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  void InterleaveRowFloatToByte(
    const float* xChannel,
    const float* yChannel,
    const float* zChannel,
--- a/winml/lib/Api.Image/CpuTensorizer.h
+++ b/winml/lib/Api.Image/CpuTensorizer.h
@ -112,13 +112,23 @@ class CpuTensorizer {
  template <typename T>
  static T ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter);

+  // clang-format off
  template <>
-  static float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
    return nominalRangeConverter.Normalize(static_cast<float>(input));
  }
+
+  // clang-format off
  template <>
-  static DirectX::PackedVector::HALF ConvertByteToFloat(
-    const BYTE& input, const NominalRangeConverter& nominalRangeConverter
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  DirectX::PackedVector::HALF ConvertByteToFloat(
+    const BYTE& input,
+    const NominalRangeConverter& nominalRangeConverter
  ) {
    return nominalRangeConverter.Normalize(DirectX::PackedVector::XMConvertFloatToHalf(input));
  }
@ -159,9 +169,13 @@ class CpuTensorizer {
    }
  }

+  // clang-format off
 #if defined(_M_AMD64) || defined(_M_IX86)
  template <>
-  static void DeinterleaveRowByteToFloat(
+#if _MSVC_LANG < 202002L
+  static
+#endif
+  void DeinterleaveRowByteToFloat(
    _In_ BYTE* pBuffer,
    _Inout_ float* xChannel,
    _Inout_ float* yChannel,
--- a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
+++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
@ -170,8 +170,8 @@ void TensorToVideoFrameConverter::DX12TensorToVideoFrame(
          D3D12_RESOURCE_DESC cachedTextureDesc = output_resource_->GetDesc();

          if (cachedTextureDesc.Width != videoFrameTextureDesc.Width ||
-                        cachedTextureDesc.Height != videoFrameTextureDesc.Height ||
-                        cachedTextureDesc.Format != videoFrameTextureDesc.Format) {
+              cachedTextureDesc.Height != videoFrameTextureDesc.Height ||
+              cachedTextureDesc.Format != videoFrameTextureDesc.Format) {
            // The dimensions or format don't match, so we need to re-create our texture
            output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device());
            D3D11_cached_texture_ = ShareD3D12Texture(output_resource_.Get(), pDeviceCache->GetD3D11Device());
@ -197,11 +197,10 @@ void TensorToVideoFrameConverter::DX12TensorToVideoFrame(
        UINT handleSize = static_cast<UINT>(sizeof(sharedHandle));

        if ((FAILED(spVideoFrameTexture->GetPrivateData(
-                         _d3d11TextureGUID, &comPtrSize, spSharedD3D11Texture.GetAddressOf()
-                     )) ||
-                     !spSharedD3D11Texture.Get()) ||
-                    (FAILED(spVideoFrameTexture->GetPrivateData(_handleGUID, &handleSize, &sharedHandle)) ||
-                     sharedHandle != shared_handle_)) {
+                 _d3d11TextureGUID, &comPtrSize, spSharedD3D11Texture.GetAddressOf())) ||
+             !spSharedD3D11Texture.Get()) ||
+            (FAILED(spVideoFrameTexture->GetPrivateData(_handleGUID, &handleSize, &sharedHandle)) ||
+             sharedHandle != shared_handle_)) {
          // Create a new shared texture that we cache on the video frame texture
          output_resource_ = CreateShareableD3D12Texture(videoFrameTextureDesc, pDeviceCache->GetD3D12Device());
          spSharedD3D11Texture = ShareD3D12Texture(output_resource_.Get(), spTextureDevice.Get());
@ -499,9 +498,9 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(
  outputResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;

  if (!UAV_resource_ || outputDesc.Format != UAV_resource_->GetDesc().Format ||
-        outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) {
+      outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) {
    WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
      D3D12_HEAP_FLAG_NONE,
      &outputResourceDesc,
      D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
@ -599,31 +598,31 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(

    command_list_->ResourceBarrier(
      1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
        pInputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
-      )
+      ))
    );
    command_list_->Dispatch(dispatchWidth, dispatchHeight, 1);
    command_list_->ResourceBarrier(
      1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
        pInputResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
    );

    // Copy the UAV data to the output resource after detensorization
    command_list_->ResourceBarrier(
      1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
        UAV_resource_.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE
-      )
+      ))
    );
    command_list_->CopyResource(pOutputResource, UAV_resource_.Get());
    command_list_->ResourceBarrier(
      1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
        UAV_resource_.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
    );

    WINML_THROW_IF_FAILED(command_list_->Close());
@ -657,9 +656,9 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
  // TODO: Make an allocator for readback heaps
  if (!readback_heap_ || readback_heap_->GetDesc().Width < singleVideoFramebufferSize) {
    WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
      D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize)),
      D3D12_RESOURCE_STATE_COPY_DEST,
      nullptr,
      IID_PPV_ARGS(&readback_heap_)
@ -689,12 +688,14 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
  device_cache.SyncD3D12ToCPU();

  void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, singleVideoFramebufferSize), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(
+    readback_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, singleVideoFramebufferSize)), &pCPUTensorBuffer)
+  );

  // We avoid the Video Frame pipeline by manually downloading the GPU data to the CPU and detensorize while we are filling the readback heap
  ConvertCPUTensorToSoftwareBitmap(pCPUTensorBuffer, tensorDesc, softwareBitmap);

-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }

 void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
@ -708,9 +709,9 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
  // TODO: Make an allocator for readback heaps
  if (!readback_heap_ || readback_heap_->GetDesc().Width < buffer_size_in_bytes) {
    WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
      D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
      D3D12_RESOURCE_STATE_COPY_DEST,
      nullptr,
      IID_PPV_ARGS(&readback_heap_)
@ -733,9 +734,9 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
  device_cache.SyncD3D12ToCPU();

  byte* readback_buffer = nullptr;
-  WINML_THROW_IF_FAILED(
-    readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast<void**>(&readback_buffer))
-  );
+  WINML_THROW_IF_FAILED(readback_heap_->Map(
+    0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)), reinterpret_cast<void**>(&readback_buffer)
+  ));
  auto readback_buffer_span = gsl::span<byte>(readback_buffer, buffer_size_in_bytes);
  _winml::StoreSpanIntoDisjointBuffers(
    buffers.size(),
@ -748,7 +749,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
    readback_buffer_span
  );

-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }

 D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
--- a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
+++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
@ -600,9 +600,9 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
  // TODO: Make an allocator for upload heaps
  if (!upload_heap_ || upload_heap_->GetDesc().Width < bufferSize) {
    WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
      D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(bufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(bufferSize)),
      D3D12_RESOURCE_STATE_GENERIC_READ,
      nullptr,
      IID_PPV_ARGS(&upload_heap_)
@ -610,14 +610,14 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
  }

  void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), unmove_ptr(pCPUTensorBuffer)));

  // We avoid the Video Frame pipeline by manually sending the CPU data to the GPU, and we tensorize while we are filling the
  // upload heap. The image may already have been cropped/scaled by the video frame pipeline, so we send the scaled bounds
  // instead of the initial input bounds
  ConvertSoftwareBitmapToCPUTensor(convertedSoftwareBitmap, tensorDesc, scaledBounds, pCPUTensorBuffer);

-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, bufferSize));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, bufferSize)));

  ResetCommandList(device_cache);

@ -642,9 +642,9 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
  // Copy the cpu memory into the gpu resource
  if (!upload_heap_ || upload_heap_->GetDesc().Width < buffer_size_in_bytes) {
    WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
      D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
      D3D12_RESOURCE_STATE_GENERIC_READ,
      nullptr,
      IID_PPV_ARGS(&upload_heap_)
@ -652,7 +652,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
  }

  byte* gpu_buffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast<void**>(&gpu_buffer)));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), reinterpret_cast<void**>(&gpu_buffer)));
  auto gpu_buffer_span = gsl::span<byte>(gpu_buffer, buffer_size_in_bytes);

  _winml::LoadSpanFromDisjointBuffers(
@ -666,7 +666,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
    gpu_buffer_span
  );

-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, buffer_size_in_bytes));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)));

  ResetCommandList(device_cache);

--- a/winml/lib/Api.Image/inc/D3DDeviceCache.h
+++ b/winml/lib/Api.Image/inc/D3DDeviceCache.h
@ -35,6 +35,11 @@ enum class PipelineStateCacheOperation : unsigned char {
  kCount = 2
 };

+template <typename E>
+constexpr auto underlying(E e) noexcept {
+  return static_cast<typename std::underlying_type<E>::type>(e);
+}
+
 class D3DDeviceCache {
 public:
  ~D3DDeviceCache();
@ -104,9 +109,10 @@ class D3DDeviceCache {
  winrt::com_ptr<ID3D12RootSignature> tensorize_root_signature_;
  winrt::com_ptr<ID3D12RootSignature> detensorize_root_signature_;

+  // clang-format off
  winrt::com_ptr<ID3D12PipelineState>
-    cached_pipeline_state[PipelineStateCacheType::kCount][PipelineStateCacheFormat::kCount]
-                         [PipelineStateCacheFormat::kCount][PipelineStateCacheOperation::kCount];
+    cached_pipeline_state[underlying(PipelineStateCacheType::kCount)][underlying(PipelineStateCacheFormat::kCount)]
+                         [underlying(PipelineStateCacheFormat::kCount)][underlying(PipelineStateCacheOperation::kCount)];

  winrt::com_ptr<ID3D12Resource> detensorize_vertex_buffer_;

--- a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
+++ b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
@ -80,7 +80,7 @@ class TensorToVideoFrameConverter : public ImageConverter {
    _Inout_ wm::VideoFrame& unsupported_video_frame
  );

-  static D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
+  static D3D12_SHADER_RESOURCE_VIEW_DESC CreateSRVDescriptor(
    const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
  );

--- a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
+++ b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
@ -86,7 +86,7 @@ class VideoFrameToTensorConverter : public ImageConverter {
    const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
  );

-  static void VideoFrameToTensorConverter::ConvertSoftwareBitmapToCPUTensor(
+  static void ConvertSoftwareBitmapToCPUTensor(
    _In_ const wgi::SoftwareBitmap& software_bitmap,
    _In_ const ImageTensorDescription& tensor_description,
    _In_ const wgi::BitmapBounds& input_bounds,
--- a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
+++ b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
@ -862,12 +862,12 @@ struct FillMapTensors {
  static HRESULT Run(
    const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
  ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
    RETURN_HR_IF_NOT_OK_MSG(
      ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
    );

-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
    RETURN_HR_IF_NOT_OK_MSG(
      ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
    );
@ -888,7 +888,7 @@ struct FillMapTensors<HSTRING, TAbiValue> {
  static HRESULT Run(
    const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
  ) {
-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
    RETURN_HR_IF_NOT_OK_MSG(
      ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
    );
@ -916,7 +916,7 @@ struct FillMapTensors<TAbiKey, HSTRING> {
  static HRESULT Run(
    const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
  ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
    RETURN_HR_IF_NOT_OK_MSG(
      ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
    );
--- a/winml/lib/Api/ImageFeatureValue.h
+++ b/winml/lib/Api/ImageFeatureValue.h
@ -23,7 +23,7 @@ struct ImageFeatureValue : ImageFeatureValueT<ImageFeatureValue, _winml::ILotusV
  wfc::IIterable<Windows::Media::VideoFrame> VideoFrames();
  winml::LearningModelFeatureKind Kind();

-  static winml::ImageFeatureValue ImageFeatureValue::Create(
+  static winml::ImageFeatureValue Create(
    uint32_t batchSize, Windows::Graphics::Imaging::BitmapPixelFormat format, uint32_t width, uint32_t height
  );
  static winml::ImageFeatureValue CreateFromVideoFrame(Windows::Media::VideoFrame const& image);
--- a/winml/lib/Api/impl/MapBase.h
+++ b/winml/lib/Api/impl/MapBase.h
@ -7,6 +7,8 @@

 #include "MapFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "IMapFeatureValue.h"

 namespace _winml {

--- a/winml/lib/Api/impl/SequenceBase.h
+++ b/winml/lib/Api/impl/SequenceBase.h
@ -6,6 +6,10 @@
 #include "MapFeatureDescriptor.h"
 #include "SequenceFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "ISequenceFeatureValue.h"
+
+#include "FeatureValues.h"

 namespace _winml {

@ -169,55 +173,55 @@ struct SequenceBase : public winrt::implements<
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorBoolean>() {
-    return winmlp::TensorBoolean::Create();
+    return winml::TensorBoolean::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorFloat>() {
-    return winmlp::TensorFloat::Create();
+    return winml::TensorFloat::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorDouble>() {
-    return winmlp::TensorDouble::Create();
+    return winml::TensorDouble::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorInt8Bit>() {
-    return winmlp::TensorInt8Bit::Create();
+    return winml::TensorInt8Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorUInt8Bit>() {
-    return winmlp::TensorUInt8Bit::Create();
+    return winml::TensorUInt8Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorUInt16Bit>() {
-    return winmlp::TensorUInt16Bit::Create();
+    return winml::TensorUInt16Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorInt16Bit>() {
-    return winmlp::TensorInt16Bit::Create();
+    return winml::TensorInt16Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorUInt32Bit>() {
-    return winmlp::TensorUInt32Bit::Create();
+    return winml::TensorUInt32Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorInt32Bit>() {
-    return winmlp::TensorInt32Bit::Create();
+    return winml::TensorInt32Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorUInt64Bit>() {
-    return winmlp::TensorUInt64Bit::Create();
+    return winml::TensorUInt64Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorInt64Bit>() {
-    return winmlp::TensorInt64Bit::Create();
+    return winml::TensorInt64Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorFloat16Bit>() {
-    return winmlp::TensorFloat16Bit::Create();
+    return winml::TensorFloat16Bit::Create();
  }
  template <>
  auto CreatePlaceholderTensor<winml::TensorString>() {
-    return winmlp::TensorString::Create();
+    return winml::TensorString::Create();
  }

  void AppendValue(_winml::BindingContext& context, wfc::IVector<T> data, winrt::com_ptr<_winml::IValue> value) {
--- a/winml/lib/Api/impl/TensorBase.h
+++ b/winml/lib/Api/impl/TensorBase.h
@ -875,7 +875,7 @@ struct TensorBase : TBase {

    WINML_THROW_HR_IF_TRUE_MSG(
      E_ILLEGAL_METHOD_CALL,
-      std::is_same<T, std::string>::value,
+      (std::is_same<T, std::string>::value),
      "TensorString objects cannot be created from IBuffers!"
    );
  }
--- a/winml/lib/Api/impl/TensorKindFrom.h
+++ b/winml/lib/Api/impl/TensorKindFrom.h
@ -3,6 +3,8 @@

 #pragma once

+#include "TensorFeatureDescriptor.h"
+
 namespace _winml {

 // We need to define our own type for Half since DirectX::PackedVector::Half resolves to uint16_t per its typedef declaration.
--- a/winml/lib/Api/pch/pch.h
+++ b/winml/lib/Api/pch/pch.h
@ -12,5 +12,6 @@

 #include "cppwinrt_onnx.h"
 #include "dx.h"
+#include "FeatureValues.h"

 #pragma warning(pop)
--- a/winml/lib/Common/inc/common.h
+++ b/winml/lib/Common/inc/common.h
@ -51,3 +51,8 @@ TRACELOGGING_DECLARE_PROVIDER(winml_trace_logging_provider);
 #include "NamespaceAliases.h"
 #include "StringHelpers.h"
 #include "WinML_Lock.h"
+
+template <typename T>
+auto unmove_ptr(T&& t) {
+  return &static_cast<T&>(t);
+}