Renaming MKL-DNN as DNNL (#2515)
* DNNL: Moving Files to rename file names * DNNL name change * azure pipeline updated * disable ceil/dialation and enable Opset10 * disable ceil/dialation tests in Python * mlperf_ssd_resnet34_1200 disabled
This commit is contained in:
Родитель
3d627362a0
Коммит
31ea11a696
11
BUILD.md
11
BUILD.md
|
@ -96,7 +96,7 @@ The complete list of build options can be found by running `./build.sh (or .\bui
|
|||
**Execution Providers**
|
||||
* [NVIDIA CUDA](#CUDA)
|
||||
* [NVIDIA TensorRT](#TensorRT)
|
||||
* [Intel MKL-DNN/MKL-ML](#MKLDNN-and-MKLML)
|
||||
* [Intel DNNL/MKL-ML](#DNNL-and-MKLML)
|
||||
* [Intel nGraph](#nGraph)
|
||||
* [Intel OpenVINO](#openvino)
|
||||
* [Android NNAPI](#Android)
|
||||
|
@ -203,15 +203,12 @@ Dockerfile instructions are available [here](./dockerfiles#tensorrt)
|
|||
|
||||
---
|
||||
|
||||
### MKLDNN and MKLML
|
||||
See more information on MKL-DNN and MKL-ML [here](./docs/execution_providers/MKL-DNN-ExecutionProvider.md).
|
||||
### DNNL and MKLML
|
||||
See more information on DNNL and MKL-ML [here](./docs/execution_providers/DNNL-ExecutionProvider.md).
|
||||
|
||||
#### Build Instructions
|
||||
##### Linux
|
||||
MKL-DNN: `./build.sh --use_mkldnn`
|
||||
|
||||
MKL-DNN built with dependency on MKL small libraries: `./build.sh --use_mkldnn --use_mklml`
|
||||
|
||||
DNNL: `./build.sh --use_dnnl`
|
||||
---
|
||||
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ Currently ONNX Runtime supports the following accelerators:
|
|||
|
||||
* NVIDIA CUDA
|
||||
* Intel MKL-ML
|
||||
* [Intel MKL-DNN](./docs/execution_providers/MKL-DNN-ExecutionProvider.md) - [subgraph optimization](./docs/execution_providers/MKL-DNN-Subgraphs.md)
|
||||
* [Intel DNNL](./docs/execution_providers/MKL-DNN-ExecutionProvider.md) - [subgraph optimization](./docs/execution_providers/MKL-DNN-Subgraphs.md)
|
||||
* [Intel nGraph](./docs/execution_providers/nGraph-ExecutionProvider.md)
|
||||
* [NVIDIA TensorRT](./docs/execution_providers/TensorRT-ExecutionProvider.md)
|
||||
* [Intel OpenVINO](./docs/execution_providers/OpenVINO-ExecutionProvider.md)
|
||||
|
|
|
@ -610,7 +610,7 @@ Exhibit B - "Incompatible With Secondary Licenses" Notice
|
|||
|
||||
_____
|
||||
|
||||
intel/mkl-dnn
|
||||
intel/dnnl
|
||||
|
||||
Copyright 2016-2018 Intel Corporation
|
||||
|
||||
|
|
|
@ -52,8 +52,8 @@ option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF)
|
|||
option(onnxruntime_USE_NSYNC "Build with NSYNC support. This option only takes effect on Linux" OFF)
|
||||
option(onnxruntime_USE_EIGEN_FOR_BLAS "Use eign for blas" ON)
|
||||
option(onnxruntime_USE_NNAPI "Build with DNNLibrary for Android NNAPI support" OFF)
|
||||
option(onnxruntime_USE_MKLDNN "Build with MKL-DNN support" OFF)
|
||||
option(onnxruntime_USE_MKLML "Build MKL-DNN with MKL-ML binary dependency" OFF)
|
||||
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
|
||||
option(onnxruntime_USE_MKLML "Build DNNL with MKL-ML binary dependency" OFF)
|
||||
option(onnxruntime_USE_GEMMLOWP "Build with gemmlowp for quantized gemm" OFF)
|
||||
option(onnxruntime_USE_AUTOML "Build AutoML support" ON)
|
||||
option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF)
|
||||
|
@ -327,7 +327,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DGSL_UNEN
|
|||
|
||||
include(eigen)
|
||||
#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn, jemalloc,
|
||||
# mkldnn/mklml, openblas, onnxruntime_codegen_tvm, tvm, nnvm_compiler and pthread
|
||||
# dnnl/mklml, openblas, onnxruntime_codegen_tvm, tvm, nnvm_compiler and pthread
|
||||
# pthread is always at the last
|
||||
set(onnxruntime_EXTERNAL_LIBRARIES onnx onnx_proto protobuf::libprotobuf re2)
|
||||
|
||||
|
@ -346,8 +346,8 @@ if (onnxruntime_USE_ACL)
|
|||
endif()
|
||||
|
||||
# MKLML
|
||||
if (onnxruntime_USE_MKLDNN OR onnxruntime_USE_MKLML)
|
||||
include(mkldnn)
|
||||
if (onnxruntime_USE_DNNL OR onnxruntime_USE_MKLML)
|
||||
include(dnnl)
|
||||
endif()
|
||||
|
||||
# TVM
|
||||
|
@ -537,10 +537,10 @@ if (onnxruntime_USE_MKLML)
|
|||
link_directories(${MKLML_LIB_DIR})
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
list(APPEND onnxruntime_EXTERNAL_LIBRARIES mkldnn)
|
||||
list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES project_mkldnn)
|
||||
link_directories(${MKLDNN_LIB_DIR})
|
||||
if (onnxruntime_USE_DNNL)
|
||||
list(APPEND onnxruntime_EXTERNAL_LIBRARIES dnnl)
|
||||
list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES project_dnnl)
|
||||
link_directories(${DNNL_LIB_DIR})
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_NGRAPH)
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
include (ExternalProject)
|
||||
|
||||
set(DNNL_URL https://github.com/intel/mkl-dnn.git)
|
||||
# If DNNL_TAG is updated, check if MKLML_VERSION and platform.cmake.patch need to be updated.
|
||||
set(DNNL_TAG v1.1.1)
|
||||
set(MKLML_VERSION 2019.0.5.20190502)
|
||||
|
||||
if(WIN32)
|
||||
set(MKLML_OS_VERSION_STR "win")
|
||||
set(MKLML_FILE_EXTENSION "zip")
|
||||
set(DNNL_SHARED_LIB dnnl.dll)
|
||||
set(DNNL_IMPORT_LIB dnnl.lib)
|
||||
if(onnxruntime_USE_MKLML)
|
||||
# Windows-only updated MKLML binary which contains fix for thread cleanup hang.
|
||||
set(MKLML_VERSION 2020.0.20190813)
|
||||
set(MKLML_SHARED_LIB mklml.dll)
|
||||
set(MKLML_IMPORT_LIB mklml.lib)
|
||||
set(IOMP5MD_SHARED_LIB libiomp5md.dll)
|
||||
set(IOMP5MD_IMPORT_LIB libiomp5md.lib)
|
||||
endif()
|
||||
else()
|
||||
set(MKLML_FILE_EXTENSION "tgz")
|
||||
if (APPLE)
|
||||
set(DNNL_SHARED_LIB libdnnl.1.dylib)
|
||||
set(MKLML_OS_VERSION_STR "mac")
|
||||
else()
|
||||
set(DNNL_SHARED_LIB libdnnl.so.1)
|
||||
set(MKLML_OS_VERSION_STR "lnx")
|
||||
endif()
|
||||
if(onnxruntime_USE_MKLML)
|
||||
set(MKLML_SHARED_LIB libmklml_intel.so)
|
||||
set(IOMP5MD_SHARED_LIB libiomp5.so)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLML)
|
||||
set(MKLDNN_VERSION_SHORT v0.20)
|
||||
set(MKLML_URL https://github.com/intel/mkl-dnn/releases/download/${MKLDNN_VERSION_SHORT}/mklml_${MKLML_OS_VERSION_STR}_${MKLML_VERSION}.${MKLML_FILE_EXTENSION})
|
||||
|
||||
ExternalProject_Add(project_mklml
|
||||
PREFIX mklml
|
||||
URL ${MKLML_URL}
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
INSTALL_COMMAND "" )
|
||||
|
||||
set(MKML_DIR ${CMAKE_CURRENT_BINARY_DIR}/mklml/src/project_mklml)
|
||||
set(MKLML_INCLUDE_DIR "${MKML_DIR}/include")
|
||||
set(MKLML_LIB_DIR "${MKML_DIR}/lib")
|
||||
link_directories(${MKLML_LIB_DIR})
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_DNNL)
|
||||
set(DNNL_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/dnnl/src/dnnl/src)
|
||||
set(DNNL_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/dnnl/install)
|
||||
set(DNNL_LIB_DIR ${DNNL_INSTALL}/${CMAKE_INSTALL_LIBDIR})
|
||||
if(WIN32)
|
||||
set(DNNL_DLL_PATH ${DNNL_INSTALL}/${CMAKE_INSTALL_BINDIR}/${DNNL_SHARED_LIB})
|
||||
else()
|
||||
set(DNNL_DLL_PATH ${DNNL_LIB_DIR}/${DNNL_SHARED_LIB})
|
||||
endif()
|
||||
set(DNNL_INCLUDE_DIR ${DNNL_INSTALL}/include)
|
||||
set(DNNL_CMAKE_EXTRA_ARGS)
|
||||
# set(DNNL_PATCH_COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/mkldnn/constexpr.patch)
|
||||
# discard prior changes due to patching in mkldnn source to unblock incremental builds.
|
||||
# set(MKLDNN_PATCH_DISCARD_COMMAND cd ${DNNL_SOURCE} && git checkout -- .)
|
||||
# if(NOT onnxruntime_BUILD_FOR_NATIVE_MACHINE)
|
||||
# pre-v1.0
|
||||
# list(APPEND DNNL_CMAKE_EXTRA_ARGS "-DARCH_OPT_FLAGS=")
|
||||
# v1.0
|
||||
# list(APPEND DNNL_CMAKE_EXTRA_ARGS "-DDNNL_ARCH_OPT_FLAGS=")
|
||||
# endif()
|
||||
ExternalProject_Add(project_dnnl
|
||||
PREFIX dnnl
|
||||
GIT_REPOSITORY ${DNNL_URL}
|
||||
GIT_TAG ${DNNL_TAG}
|
||||
# PATCH_COMMAND ${MKLDNN_PATCH_DISCARD_COMMAND} COMMAND ${DNNL_PATCH_COMMAND}
|
||||
SOURCE_DIR ${DNNL_SOURCE}
|
||||
CMAKE_ARGS -DDNNL_PRODUCT_BUILD_MODE=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${DNNL_INSTALL} -DMKLROOT=${MKML_DIR}
|
||||
)
|
||||
link_directories(${DNNL_LIB_DIR})
|
||||
#if (onnxruntime_USE_MKLML)
|
||||
# add_dependencies(project_dnnl project_mklml)
|
||||
#endif()
|
||||
endif()
|
|
@ -1,86 +0,0 @@
|
|||
include (ExternalProject)
|
||||
|
||||
set(MKLDNN_URL https://github.com/intel/mkl-dnn.git)
|
||||
# If MKLDNN_TAG is updated, check if MKLML_VERSION and platform.cmake.patch need to be updated.
|
||||
set(MKLDNN_TAG v1.0.2)
|
||||
set(MKLML_VERSION 2019.0.5.20190502)
|
||||
|
||||
if(WIN32)
|
||||
set(MKLML_OS_VERSION_STR "win")
|
||||
set(MKLML_FILE_EXTENSION "zip")
|
||||
set(MKLDNN_SHARED_LIB mkldnn.dll)
|
||||
set(MKLDNN_IMPORT_LIB mkldnn.lib)
|
||||
if(onnxruntime_USE_MKLML)
|
||||
# Windows-only updated MKLML binary which contains fix for thread cleanup hang.
|
||||
set(MKLML_VERSION 2020.0.20190813)
|
||||
set(MKLML_SHARED_LIB mklml.dll)
|
||||
set(MKLML_IMPORT_LIB mklml.lib)
|
||||
set(IOMP5MD_SHARED_LIB libiomp5md.dll)
|
||||
set(IOMP5MD_IMPORT_LIB libiomp5md.lib)
|
||||
endif()
|
||||
else()
|
||||
set(MKLML_FILE_EXTENSION "tgz")
|
||||
if (APPLE)
|
||||
set(MKLDNN_SHARED_LIB libmkldnn.1.dylib)
|
||||
set(MKLML_OS_VERSION_STR "mac")
|
||||
else()
|
||||
set(MKLDNN_SHARED_LIB libmkldnn.so.1)
|
||||
set(MKLML_OS_VERSION_STR "lnx")
|
||||
endif()
|
||||
if(onnxruntime_USE_MKLML)
|
||||
set(MKLML_SHARED_LIB libmklml_intel.so)
|
||||
set(IOMP5MD_SHARED_LIB libiomp5.so)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLML)
|
||||
set(MKLDNN_VERSION_SHORT v0.20)
|
||||
set(MKLML_URL https://github.com/intel/mkl-dnn/releases/download/${MKLDNN_VERSION_SHORT}/mklml_${MKLML_OS_VERSION_STR}_${MKLML_VERSION}.${MKLML_FILE_EXTENSION})
|
||||
|
||||
ExternalProject_Add(project_mklml
|
||||
PREFIX mklml
|
||||
URL ${MKLML_URL}
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
INSTALL_COMMAND "" )
|
||||
|
||||
set(MKML_DIR ${CMAKE_CURRENT_BINARY_DIR}/mklml/src/project_mklml)
|
||||
set(MKLML_INCLUDE_DIR "${MKML_DIR}/include")
|
||||
set(MKLML_LIB_DIR "${MKML_DIR}/lib")
|
||||
link_directories(${MKLML_LIB_DIR})
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
set(MKLDNN_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/mkl-dnn/src/mkl-dnn/src)
|
||||
set(MKLDNN_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/mkl-dnn/install)
|
||||
set(MKLDNN_LIB_DIR ${MKLDNN_INSTALL}/${CMAKE_INSTALL_LIBDIR})
|
||||
if(WIN32)
|
||||
set(MKLDNN_DLL_PATH ${MKLDNN_INSTALL}/${CMAKE_INSTALL_BINDIR}/${MKLDNN_SHARED_LIB})
|
||||
else()
|
||||
set(MKLDNN_DLL_PATH ${MKLDNN_LIB_DIR}/${MKLDNN_SHARED_LIB})
|
||||
endif()
|
||||
set(MKLDNN_INCLUDE_DIR ${MKLDNN_INSTALL}/include)
|
||||
set(MKLDNN_CMAKE_EXTRA_ARGS)
|
||||
set(MKLDNN_PATCH_COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/mkldnn/constexpr.patch)
|
||||
# discard prior changes due to patching in mkldnn source to unblock incremental builds.
|
||||
set(MKLDNN_PATCH_DISCARD_COMMAND cd ${MKLDNN_SOURCE} && git checkout -- .)
|
||||
if(NOT onnxruntime_BUILD_FOR_NATIVE_MACHINE)
|
||||
# pre-v1.0
|
||||
list(APPEND MKLDNN_CMAKE_EXTRA_ARGS "-DARCH_OPT_FLAGS=")
|
||||
# v1.0
|
||||
list(APPEND MKLDNN_CMAKE_EXTRA_ARGS "-DMKLDNN_ARCH_OPT_FLAGS=")
|
||||
endif()
|
||||
ExternalProject_Add(project_mkldnn
|
||||
PREFIX mkl-dnn
|
||||
GIT_REPOSITORY ${MKLDNN_URL}
|
||||
GIT_TAG ${MKLDNN_TAG}
|
||||
PATCH_COMMAND ${MKLDNN_PATCH_DISCARD_COMMAND} COMMAND ${MKLDNN_PATCH_COMMAND}
|
||||
SOURCE_DIR ${MKLDNN_SOURCE}
|
||||
CMAKE_ARGS -DMKLDNN_PRODUCT_BUILD_MODE=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL} -DMKLROOT=${MKML_DIR} ${MKLDNN_CMAKE_EXTRA_ARGS}
|
||||
)
|
||||
link_directories(${MKLDNN_LIB_DIR})
|
||||
if (onnxruntime_USE_MKLML)
|
||||
add_dependencies(project_mkldnn project_mklml)
|
||||
endif()
|
||||
endif()
|
|
@ -58,7 +58,7 @@ target_link_libraries(onnxruntime PRIVATE
|
|||
onnxruntime_session
|
||||
${onnxruntime_libs}
|
||||
${PROVIDERS_CUDA}
|
||||
${PROVIDERS_MKLDNN}
|
||||
${PROVIDERS_DNNL}
|
||||
${PROVIDERS_NGRAPH}
|
||||
${PROVIDERS_NNAPI}
|
||||
${PROVIDERS_TENSORRT}
|
||||
|
|
|
@ -14,8 +14,8 @@ if (onnxruntime_USE_CUDA)
|
|||
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_CUDA,")
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_MKLDNN,")
|
||||
if (onnxruntime_USE_DNNL)
|
||||
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_DNNL,")
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_TENSORRT)
|
||||
|
|
|
@ -40,9 +40,9 @@ file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
|
|||
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
|
||||
)
|
||||
|
||||
if(onnxruntime_USE_MKLDNN)
|
||||
set(PROVIDERS_MKLDNN onnxruntime_providers_mkldnn)
|
||||
list(APPEND ONNXRUNTIME_PROVIDER_NAMES mkldnn)
|
||||
if(onnxruntime_USE_DNNL)
|
||||
set(PROVIDERS_DNNL onnxruntime_providers_dnnl)
|
||||
list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
|
||||
endif()
|
||||
if(onnxruntime_USE_NGRAPH)
|
||||
set(PROVIDERS_NGRAPH onnxruntime_providers_ngraph)
|
||||
|
@ -177,20 +177,20 @@ if (onnxruntime_USE_CUDA)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
file(GLOB_RECURSE onnxruntime_providers_mkldnn_cc_srcs CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/mkldnn/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/mkldnn/*.cc"
|
||||
if (onnxruntime_USE_DNNL)
|
||||
file(GLOB_RECURSE onnxruntime_providers_dnnl_cc_srcs CONFIGURE_DEPENDS
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.cc"
|
||||
)
|
||||
|
||||
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_mkldnn_cc_srcs})
|
||||
add_library(onnxruntime_providers_mkldnn ${onnxruntime_providers_mkldnn_cc_srcs})
|
||||
onnxruntime_add_include_to_target(onnxruntime_providers_mkldnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf)
|
||||
add_dependencies(onnxruntime_providers_mkldnn ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
set_target_properties(onnxruntime_providers_mkldnn PROPERTIES FOLDER "ONNXRuntime")
|
||||
target_include_directories(onnxruntime_providers_mkldnn PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${MKLDNN_INCLUDE_DIR})
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/mkldnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
|
||||
set_target_properties(onnxruntime_providers_mkldnn PROPERTIES LINKER_LANGUAGE CXX)
|
||||
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dnnl_cc_srcs})
|
||||
add_library(onnxruntime_providers_dnnl ${onnxruntime_providers_dnnl_cc_srcs})
|
||||
onnxruntime_add_include_to_target(onnxruntime_providers_dnnl onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf)
|
||||
add_dependencies(onnxruntime_providers_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
set_target_properties(onnxruntime_providers_dnnl PROPERTIES FOLDER "ONNXRuntime")
|
||||
target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR})
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
|
||||
set_target_properties(onnxruntime_providers_dnnl PROPERTIES LINKER_LANGUAGE CXX)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_TENSORRT)
|
||||
|
|
|
@ -50,8 +50,8 @@ if(onnxruntime_PYBIND_EXPORT_OPSCHEMA)
|
|||
target_compile_definitions(onnxruntime_pybind11_state PRIVATE onnxruntime_PYBIND_EXPORT_OPSCHEMA)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
target_compile_definitions(onnxruntime_pybind11_state PRIVATE USE_MKLDNN=1)
|
||||
if (onnxruntime_USE_DNNL)
|
||||
target_compile_definitions(onnxruntime_pybind11_state PRIVATE USE_DNNL=1)
|
||||
endif()
|
||||
|
||||
target_include_directories(onnxruntime_pybind11_state PRIVATE ${ONNXRUNTIME_ROOT} ${PYTHON_INCLUDE_DIR} ${NUMPY_INCLUDE_DIR})
|
||||
|
@ -68,7 +68,7 @@ set(onnxruntime_pybind11_state_libs
|
|||
onnxruntime_session
|
||||
${onnxruntime_libs}
|
||||
${PROVIDERS_CUDA}
|
||||
${PROVIDERS_MKLDNN}
|
||||
${PROVIDERS_DNNL}
|
||||
${PROVIDERS_TENSORRT}
|
||||
${PROVIDERS_NGRAPH}
|
||||
${PROVIDERS_OPENVINO}
|
||||
|
@ -198,10 +198,10 @@ add_custom_command(
|
|||
$<TARGET_FILE_DIR:${test_data_target}>
|
||||
)
|
||||
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
if (onnxruntime_USE_DNNL)
|
||||
add_custom_command(
|
||||
TARGET onnxruntime_pybind11_state POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_DLL_PATH}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${DNNL_DLL_PATH}
|
||||
$<TARGET_FILE_DIR:${test_data_target}>/onnxruntime/capi/
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -202,8 +202,8 @@ if(onnxruntime_USE_CUDA)
|
|||
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda)
|
||||
endif()
|
||||
|
||||
if(onnxruntime_USE_MKLDNN)
|
||||
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_mkldnn)
|
||||
if(onnxruntime_USE_DNNL)
|
||||
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_dnnl)
|
||||
endif()
|
||||
|
||||
if(onnxruntime_USE_NGRAPH)
|
||||
|
@ -260,7 +260,7 @@ set(ONNXRUNTIME_TEST_LIBS
|
|||
${ONNXRUNTIME_INTEROP_TEST_LIBS}
|
||||
${onnxruntime_libs}
|
||||
${PROVIDERS_CUDA}
|
||||
${PROVIDERS_MKLDNN}
|
||||
${PROVIDERS_DNNL}
|
||||
${PROVIDERS_TENSORRT}
|
||||
${PROVIDERS_NGRAPH}
|
||||
${PROVIDERS_OPENVINO}
|
||||
|
@ -314,8 +314,8 @@ onnxruntime_add_include_to_target(onnxruntime_test_utils_for_framework onnxrunti
|
|||
if (onnxruntime_USE_FULL_PROTOBUF)
|
||||
target_compile_definitions(onnxruntime_test_utils_for_framework PRIVATE USE_FULL_PROTOBUF=1)
|
||||
endif()
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
target_compile_definitions(onnxruntime_test_utils_for_framework PUBLIC USE_MKLDNN=1)
|
||||
if (onnxruntime_USE_DNNL)
|
||||
target_compile_definitions(onnxruntime_test_utils_for_framework PUBLIC USE_DNNL=1)
|
||||
endif()
|
||||
add_dependencies(onnxruntime_test_utils_for_framework ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
target_include_directories(onnxruntime_test_utils_for_framework PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT})
|
||||
|
@ -329,8 +329,8 @@ onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_framework g
|
|||
if (onnxruntime_USE_FULL_PROTOBUF)
|
||||
target_compile_definitions(onnxruntime_test_utils PRIVATE USE_FULL_PROTOBUF=1)
|
||||
endif()
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
target_compile_definitions(onnxruntime_test_utils PUBLIC USE_MKLDNN=1)
|
||||
if (onnxruntime_USE_DNNL)
|
||||
target_compile_definitions(onnxruntime_test_utils PUBLIC USE_DNNL=1)
|
||||
endif()
|
||||
add_dependencies(onnxruntime_test_utils ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT})
|
||||
|
@ -440,11 +440,11 @@ add_custom_command(
|
|||
${TEST_DATA_SRC}
|
||||
${TEST_DATA_DES})
|
||||
if(WIN32)
|
||||
if (onnxruntime_USE_MKLDNN)
|
||||
list(APPEND onnx_test_libs mkldnn)
|
||||
if (onnxruntime_USE_DNNL)
|
||||
list(APPEND onnx_test_libs dnnl)
|
||||
add_custom_command(
|
||||
TARGET ${test_data_target} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_DLL_PATH} $<TARGET_FILE_DIR:${test_data_target}>
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${DNNL_DLL_PATH} $<TARGET_FILE_DIR:${test_data_target}>
|
||||
)
|
||||
endif()
|
||||
if (onnxruntime_USE_MKLML)
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
<None Include="$(NativeBuildOutputDir)\mkldnn.dll" Condition="Exists('$(NativeBuildOutputDir)\mkldnn.dll')">
|
||||
<None Include="$(NativeBuildOutputDir)\dnnl.dll" Condition="Exists('$(NativeBuildOutputDir)\dnnl.dll')">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
|
|
|
@ -79,8 +79,8 @@
|
|||
CopyToOutputDirectory="Always"
|
||||
Visible="false"
|
||||
/>
|
||||
<None Include="$(NativeBuildOutputDir)\mkldnn.dll"
|
||||
Condition="Exists('$(NativeBuildOutputDir)\mkldnn.dll')"
|
||||
<None Include="$(NativeBuildOutputDir)\dnnl.dll"
|
||||
Condition="Exists('$(NativeBuildOutputDir)\dnnl.dll')"
|
||||
PackagePath="\runtimes\win-$(TargetArchitecture)\native"
|
||||
Pack="true"
|
||||
CopyToOutputDirectory="Always"
|
||||
|
|
|
@ -412,9 +412,9 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
[DllImport(nativeLib, CharSet = charSet)]
|
||||
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_CPU(IntPtr /*(OrtSessionOptions*) */ options, int use_arena);
|
||||
|
||||
#if USE_MKLDNN
|
||||
#if USE_DNNL
|
||||
[DllImport(nativeLib, CharSet = charSet)]
|
||||
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Mkldnn(IntPtr /*(OrtSessionOptions*) */ options, int use_arena);
|
||||
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Dnnl(IntPtr /*(OrtSessionOptions*) */ options, int use_arena);
|
||||
#endif
|
||||
|
||||
#if USE_CUDA
|
||||
|
|
|
@ -81,10 +81,10 @@ namespace Microsoft.ML.OnnxRuntime
|
|||
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CPU(_nativePtr, useArena));
|
||||
}
|
||||
|
||||
#if USE_MKLDNN
|
||||
public void AppendExecutionProvider_Mkldnn(int useArena)
|
||||
#if USE_DNNL
|
||||
public void AppendExecutionProvider_Dnnl(int useArena)
|
||||
{
|
||||
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Mkldnn(_nativePtr, useArena));
|
||||
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Dnnl(_nativePtr, useArena));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -31,10 +31,10 @@
|
|||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
<None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\mkldnn.dll"
|
||||
<None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\dnnl.dll"
|
||||
Condition="'$(PlatformTarget)' == 'x64' AND
|
||||
Exists('$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\mkldnn.dll')">
|
||||
<Link>mkldnn.dll</Link>
|
||||
Exists('$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\dnnl.dll')">
|
||||
<Link>dnnl.dll</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
|
@ -58,10 +58,10 @@
|
|||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
<None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x86\native\mkldnn.dll"
|
||||
<None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x86\native\dnnl.dll"
|
||||
Condition="'$(PlatformTarget)' == 'x86' AND
|
||||
Exists('$(MSBuildThisFileDirectory)..\..\runtimes\win-x86\native\mkldnn.dll')">
|
||||
<Link>mkldnn.dll</Link>
|
||||
Exists('$(MSBuildThisFileDirectory)..\..\runtimes\win-x86\native\dnnl.dll')">
|
||||
<Link>dnnl.dll</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
|
|
|
@ -78,8 +78,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
Assert.Throws<OnnxRuntimeException>(() => { opt.GraphOptimizationLevel = (GraphOptimizationLevel)10; });
|
||||
|
||||
opt.AppendExecutionProvider_CPU(1);
|
||||
#if USE_MKLDNN
|
||||
opt.AppendExecutionProvider_Mkldnn(0);
|
||||
#if USE_DNNL
|
||||
opt.AppendExecutionProvider_Dnnl(0);
|
||||
#endif
|
||||
#if USE_CUDA
|
||||
opt.AppendExecutionProvider_CUDA(0);
|
||||
|
@ -1106,8 +1106,8 @@ namespace Microsoft.ML.OnnxRuntime.Tests
|
|||
var entryPointNames = new[]{
|
||||
"OrtGetApiBase",
|
||||
"OrtSessionOptionsAppendExecutionProvider_CPU"
|
||||
#if USE_MKLDNN
|
||||
,"OrtSessionOptionsAppendExecutionProvider_Mkldnn"
|
||||
#if USE_DNNL
|
||||
,"OrtSessionOptionsAppendExecutionProvider_Dnnl"
|
||||
#endif
|
||||
#if USE_CUDA
|
||||
,"OrtSessionOptionsAppendExecutionProvider_CUDA"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
<None Include="$(NativeBuildOutputDir)\mkldnn.dll" Condition="Exists('$(NativeBuildOutputDir)\mkldnn.dll')">
|
||||
<None Include="$(NativeBuildOutputDir)\dnnl.dll" Condition="Exists('$(NativeBuildOutputDir)\dnnl.dll')">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
<Visible>false</Visible>
|
||||
</None>
|
||||
|
|
|
@ -27,14 +27,14 @@ ENV PATH="/usr/local/go/bin:${PATH}"
|
|||
|
||||
WORKDIR /
|
||||
RUN mkdir -p /onnxruntime/build
|
||||
RUN python3 /onnxruntime/tools/ci_build/build.py --build_dir /onnxruntime/build --config Release --use_mkldnn --build_server --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER)
|
||||
RUN python3 /onnxruntime/tools/ci_build/build.py --build_dir /onnxruntime/build --config Release --use_dnnl --build_server --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER)
|
||||
|
||||
|
||||
FROM ubuntu:16.04 AS minimal
|
||||
WORKDIR /onnxruntime/server/
|
||||
COPY --from=build /onnxruntime/build/Release/onnxruntime_server /onnxruntime/server/
|
||||
COPY --from=build /onnxruntime/build/Release/libonnxruntime.so.* /lib/
|
||||
COPY --from=build /onnxruntime/build/Release/mkl-dnn/install/lib/libmkldnn.so* /lib/
|
||||
COPY --from=build /onnxruntime/build/Release/mkl-dnn/install/lib/libdnnl.so* /lib/
|
||||
RUN apt-get update && apt-get install -y libgomp1
|
||||
ENTRYPOINT ["/onnxruntime/server/onnxruntime_server"]
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ ENV PATH="/usr/local/go/bin:${PATH}"
|
|||
|
||||
WORKDIR /
|
||||
RUN mkdir -p /onnxruntime/build
|
||||
RUN python3 /onnxruntime/tools/ci_build/build.py --build_dir /onnxruntime/build --config Release --use_mkldnn --use_mklml \
|
||||
RUN python3 /onnxruntime/tools/ci_build/build.py --build_dir /onnxruntime/build --config Release --use_dnnl --use_mklml \
|
||||
--build_server --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER)
|
||||
|
||||
|
||||
|
@ -36,7 +36,7 @@ WORKDIR /onnxruntime/server/
|
|||
RUN apt-get update && apt-get install -y libgomp1
|
||||
COPY --from=build /onnxruntime/build/Release/onnxruntime_server /onnxruntime/server/
|
||||
COPY --from=build /onnxruntime/build/Release/libonnxruntime.so.* /lib/
|
||||
COPY --from=build /onnxruntime/build/Release/mkl-dnn/install/lib/libmkldnn.so* /lib/
|
||||
COPY --from=build /onnxruntime/build/Release/mkl-dnn/install/lib/libdnnl.so* /lib/
|
||||
COPY --from=build /onnxruntime/build/Release/mklml/src/project_mklml/lib/*.so* /lib/
|
||||
ENTRYPOINT ["/onnxruntime/server/onnxruntime_server"]
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ Examples:
|
|||
Provider](../onnxruntime/core/providers/cpu/cpu_execution_provider.h)
|
||||
* [CUDA Execution
|
||||
Provider](../onnxruntime/core/providers/cuda/cuda_execution_provider.h)
|
||||
* [MKL-DNN Execution
|
||||
Provider](../onnxruntime/core/providers/mkldnn/mkldnn_execution_provider.h)
|
||||
* [DNNL Execution
|
||||
Provider](../onnxruntime/core/providers/dnnl/dnnl_execution_provider.h)
|
||||
|
||||
|
||||
# Using the execution provider
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
* Creating an InferenceSession from an on-disk model file and a set of SessionOptions.
|
||||
* Registering customized loggers.
|
||||
* Registering customized allocators.
|
||||
* Registering predefined providers and set the priority order. ONNXRuntime has a set of predefined execution providers, like CUDA, MKLDNN. User can register providers to their InferenceSession. The order of registration indicates the preference order as well.
|
||||
* Registering predefined providers and set the priority order. ONNXRuntime has a set of predefined execution providers, like CUDA, DNNL. User can register providers to their InferenceSession. The order of registration indicates the preference order as well.
|
||||
* Running a model with inputs. These inputs must be in CPU memory, not GPU. If the model has multiple outputs, user can specify which outputs they want.
|
||||
* Converting an in-memory ONNX Tensor encoded in protobuf format to a pointer that can be used as model input.
|
||||
* Setting the thread pool size for each session.
|
||||
|
|
|
@ -35,7 +35,7 @@ Options:
|
|||
-r [repeat]: Specifies the number of times to repeat
|
||||
-v: verbose
|
||||
-n [test_case_name]: Specifies a single test case to run.
|
||||
-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'mkldnn' or 'tensorrt'. Default: 'cpu'.
|
||||
-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl' or 'tensorrt'. Default: 'cpu'.
|
||||
-x: Use parallel executor, default (without -x): sequential executor.
|
||||
-h: help
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ Official Python packages on Pypi only support the default CPU (MLAS) and default
|
|||
|
||||
For example:
|
||||
|
||||
`MKLDNN: ./build.sh --config RelWithDebInfo --use_mkldnn --build_wheel --parallel`
|
||||
`DNNL: ./build.sh --config RelWithDebInfo --use_dnnl --build_wheel --parallel`
|
||||
|
||||
` CUDA: ./build.sh --config RelWithDebInfo --use_cuda --build_wheel --parallel`
|
||||
|
||||
|
@ -30,11 +30,11 @@ Official release (nuget package) supports default (MLAS) and MKL-ML for CPU, and
|
|||
|
||||
For example:
|
||||
|
||||
`MKLDNN: ./build.sh --config RelWithDebInfo --use_mkldnn --build_csharp --parallel`
|
||||
`DNNL: ./build.sh --config RelWithDebInfo --use_dnnl --build_csharp --parallel`
|
||||
|
||||
`CUDA: ./build.sh --config RelWithDebInfo --use_cuda --build_csharp --parallel`
|
||||
|
||||
In order to use MKLDNN, nGraph, CUDA, or TensorRT execution provider, you need to call the C API OrtSessionOptionsAppendExecutionProvider. Here is an example for the CUDA execution provider:
|
||||
In order to use DNNL, nGraph, CUDA, or TensorRT execution provider, you need to call the C API OrtSessionOptionsAppendExecutionProvider. Here is an example for the CUDA execution provider:
|
||||
|
||||
C API Example:
|
||||
```c
|
||||
|
|
|
@ -450,7 +450,7 @@
|
|||
| |
|
||||
|
||||
|
||||
## Operators implemented by MKLDNNExecutionProvider
|
||||
## Operators implemented by DNNLExecutionProvider
|
||||
|
||||
| Op Name | Parameters | OpSet Version | Types Supported |
|
||||
|---------|------------|---------------|-----------------|
|
||||
|
|
|
@ -78,7 +78,7 @@ To achieve the best performance on a growing set of compute targets across cloud
|
|||
|Supported|Future|
|
||||
|---|---|
|
||||
|MLAS (Microsoft Linear Algebra Subprograms)|Android NN API (in progress)|
|
||||
|Intel MKL-DNN / MKL-ML|ARM Compute Library (community contribution by NXP, in progress)|
|
||||
|Intel DNNL / MKL-ML|ARM Compute Library (community contribution by NXP, in progress)|
|
||||
|Intel nGraph| |
|
||||
|NVIDIA CUDA| |
|
||||
|NVIDIA TensorRT| |
|
||||
|
|
|
@ -20,7 +20,7 @@ alias cmake="/usr/bin/cmake -DCMAKE_TOOLCHAIN_FILE=$OECORE_NATIVE_SYSROOT/usr/sh
|
|||
|
||||
Confiure ONNX Runtime with ACL support:
|
||||
```
|
||||
cmake ../onnxruntime-arm-upstream/cmake -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_RUN_ONNX_TESTS=OFF -Donnxruntime_GENERATE_TEST_REPORTS=ON -Donnxruntime_DEV_MODE=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -Donnxruntime_USE_CUDA=OFF -Donnxruntime_USE_NSYNC=OFF -Donnxruntime_CUDNN_HOME= -Donnxruntime_USE_JEMALLOC=OFF -Donnxruntime_ENABLE_PYTHON=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_USE_EIGEN_FOR_BLAS=ON -Donnxruntime_USE_OPENBLAS=OFF -Donnxruntime_USE_ACL=ON -Donnxruntime_USE_MKLDNN=OFF -Donnxruntime_USE_MKLML=OFF -Donnxruntime_USE_OPENMP=ON -Donnxruntime_USE_TVM=OFF -Donnxruntime_USE_LLVM=OFF -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_BRAINSLICE=OFF -Donnxruntime_USE_NUPHAR=OFF -Donnxruntime_USE_EIGEN_THREADPOOL=OFF -Donnxruntime_BUILD_UNIT_TESTS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
cmake ../onnxruntime-arm-upstream/cmake -DONNX_CUSTOM_PROTOC_EXECUTABLE=/usr/bin/protoc -Donnxruntime_RUN_ONNX_TESTS=OFF -Donnxruntime_GENERATE_TEST_REPORTS=ON -Donnxruntime_DEV_MODE=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -Donnxruntime_USE_CUDA=OFF -Donnxruntime_USE_NSYNC=OFF -Donnxruntime_CUDNN_HOME= -Donnxruntime_USE_JEMALLOC=OFF -Donnxruntime_ENABLE_PYTHON=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_USE_EIGEN_FOR_BLAS=ON -Donnxruntime_USE_OPENBLAS=OFF -Donnxruntime_USE_ACL=ON -Donnxruntime_USE_DNNL=OFF -Donnxruntime_USE_MKLML=OFF -Donnxruntime_USE_OPENMP=ON -Donnxruntime_USE_TVM=OFF -Donnxruntime_USE_LLVM=OFF -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_BRAINSLICE=OFF -Donnxruntime_USE_NUPHAR=OFF -Donnxruntime_USE_EIGEN_THREADPOOL=OFF -Donnxruntime_BUILD_UNIT_TESTS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
```
|
||||
|
||||
Build ONNX Runtime library, test and performance application:
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# MKL-DNN Execution Provider
|
||||
# DNNL Execution Provider
|
||||
|
||||
Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) is an open-source performance library for deep-learning applications. The library accelerates deep-learning applications and frameworks on Intel® architecture and Intel® Processor Graphics Architecture. Intel MKL-DNN contains vectorized and threaded building blocks that you can use to implement deep neural networks (DNN) with C and C++ interfaces. For more, please see the MKL-DNN documentation on (https://intel.github.io/mkl-dnn/).
|
||||
Intel® Math Kernel Library for Deep Neural Networks (Intel® DNNL) is an open-source performance library for deep-learning applications. The library accelerates deep-learning applications and frameworks on Intel® architecture and Intel® Processor Graphics Architecture. Intel DNNL contains vectorized and threaded building blocks that you can use to implement deep neural networks (DNN) with C and C++ interfaces. For more, please see the DNNL documentation on (https://intel.github.io/mkl-dnn/).
|
||||
|
||||
Intel and Microsoft have developed MKL-DNN Execution Provider (EP) for ONNX Runtime to accelerate performance of ONNX Runtime using Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) optimized primitives.
|
||||
Intel and Microsoft have developed DNNL Execution Provider (EP) for ONNX Runtime to accelerate performance of ONNX Runtime using Intel® Math Kernel Library for Deep Neural Networks (Intel® DNNL) optimized primitives.
|
||||
|
||||
For information on how MKL-DNN optimizes subgraphs, see [Subgraph Optimization](./MKL-DNN-Subgraphs.md)
|
||||
For information on how DNNL optimizes subgraphs, see [Subgraph Optimization](./MKL-DNN-Subgraphs.md)
|
||||
|
||||
## Build
|
||||
For build instructions, please see the [BUILD page](../../BUILD.md#mkldnn-and-mklml).
|
||||
For build instructions, please see the [BUILD page](../../BUILD.md#dnnl-and-mklml).
|
||||
|
||||
## Supported OS
|
||||
* Ubuntu 16.04
|
||||
|
@ -17,18 +17,18 @@ For build instructions, please see the [BUILD page](../../BUILD.md#mkldnn-and-mk
|
|||
## Supported backend
|
||||
* CPU
|
||||
|
||||
## Using the MKL-DNN Execution Provider
|
||||
## Using the DNNL Execution Provider
|
||||
### C/C++
|
||||
The MKLDNNExecutionProvider execution provider needs to be registered with ONNX Runtime to enable in the inference session.
|
||||
The DNNLExecutionProvider execution provider needs to be registered with ONNX Runtime to enable in the inference session.
|
||||
```
|
||||
InferenceSession session_object{so};
|
||||
session_object.RegisterExecutionProvider(std::make_unique<::onnxruntime:: MKLDNNExecutionProvider >());
|
||||
session_object.RegisterExecutionProvider(std::make_unique<::onnxruntime:: DNNLExecutionProvider >());
|
||||
status = session_object.Load(model_file_name);
|
||||
```
|
||||
The C API details are [here](../C_API.md#c-api).
|
||||
|
||||
### Python
|
||||
When using the python wheel from the ONNX Runtime built with MKL-DNN execution provider, it will be automatically prioritized over the CPU execution provider. Python APIs details are [here](https://aka.ms/onnxruntime-python).
|
||||
When using the python wheel from the ONNX Runtime built with DNNL execution provider, it will be automatically prioritized over the CPU execution provider. Python APIs details are [here](https://aka.ms/onnxruntime-python).
|
||||
|
||||
## Performance Tuning
|
||||
For performance tuning, please see guidance on this page: [ONNX Runtime Perf Tuning](../ONNX_Runtime_Perf_Tuning.md)
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
# Subgraph Optimization
|
||||
|
||||
MKL-DNN uses blocked layout (example: nhwc with channels blocked by 16 – nChw16c) to take advantage of vector operations using AVX512. To get best performance, we avoid reorders (example. Nchw16c to nchw) and propagate blocked layout to next primitive.
|
||||
DNNL uses blocked layout (example: nhwc with channels blocked by 16 – nChw16c) to take advantage of vector operations using AVX512. To get best performance, we avoid reorders (example. Nchw16c to nchw) and propagate blocked layout to next primitive.
|
||||
|
||||
Subgraph optimization achieves this in the following steps.
|
||||
1. Parses ONNX Runtime graph and creates an Internal Representation of subgraph..
|
||||
2. Subgraph Operator (MklDnnFunKernel) iterates through MKL-DNN nodes and creates a vector MKL-DNN Kernels
|
||||
3. Compute Function of MklDnnFunKernel iterates and binds data to MKL-DNN primitives in the vector and submits vector for execution.
|
||||
2. Subgraph Operator (DnnlFunKernel) iterates through DNNL nodes and creates a vector DNNL Kernels
|
||||
3. Compute Function of DnnlFunKernel iterates and binds data to DNNL primitives in the vector and submits vector for execution.
|
||||
|
||||
|
||||
## Subgraph (IR) Internal Representation
|
||||
MklDnnExecutionProvicer::GetCapability() parses ONNX model graph and creates IR (Internal Representation) of subgraphs of MKL-DNN operators.
|
||||
Each subgraph contains a vector MklDnnNodes, inputs, outputs and attributes for all its MklDnnNodes. There can be attributes of same name. So, we prefix attribute names with Node name and its index.
|
||||
DnnlExecutionProvicer::GetCapability() parses ONNX model graph and creates IR (Internal Representation) of subgraphs of DNNL operators.
|
||||
Each subgraph contains a vector DnnlNodes, inputs, outputs and attributes for all its DnnlNodes. There can be attributes of same name. So, we prefix attribute names with Node name and its index.
|
||||
Unique id for subgraph is set as an attribute.
|
||||
|
||||
MklDnnNode has an index to its inputs and outputs and pointer to its parent nodes. MklDnnNode directly reads blocked memory from its parent to avoid data reordering.
|
||||
DnnlNode has an index to its inputs and outputs and pointer to its parent nodes. DnnlNode directly reads blocked memory from its parent to avoid data reordering.
|
||||
|
||||
<p align="left"><img src="images/mkl-dnn_node.png" /></p>
|
||||
|
||||
|
||||
## Subgraph Classes
|
||||
Primitive like MklDnnConv, MklDnnPool, etc are derived from MklDnnKernel base class.
|
||||
Primitive like DnnlConv, DnnlPool, etc are derived from DnnlKernel base class.
|
||||
|
||||
The following UML diagram captures Subgraph classes.
|
||||
|
||||
|
@ -28,30 +28,30 @@ The following UML diagram captures Subgraph classes.
|
|||
|
||||
## Subgraph Execution
|
||||
|
||||
MklDnnExecutionProvicer::Compute() function creates MklDnnFuncKernel and call it’s Compute Function.
|
||||
DnnlExecutionProvicer::Compute() function creates DnnlFuncKernel and call it’s Compute Function.
|
||||
|
||||
|
||||
MklDnnFuncKernel::Compute function creates SubgraphPrimitve pool and add the object to a map.
|
||||
DnnlFuncKernel::Compute function creates SubgraphPrimitve pool and add the object to a map.
|
||||
|
||||
SubgraphPrimitve constructor calls the following member functions
|
||||
```
|
||||
SubgraphPrimitve::CreatePrimitives()
|
||||
for (auto& mklnode : mklnodes) {
|
||||
if (mklnode.name == "Conv") {
|
||||
kernel.reset(new MklDnnConv());
|
||||
kernel.reset(new DnnlConv());
|
||||
kernels.push_back(kernel);
|
||||
} else if (mklnode.name == "BatchNormalization-Relu") {
|
||||
kernel.reset(new MklDnnBatchNorm());
|
||||
kernel.reset(new DnnlBatchNorm());
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mklnode.name == "MaxPool") {
|
||||
kernel.reset(new MklDnnPool());
|
||||
kernel.reset(new DnnlPool());
|
||||
context_.kernels.push_back(kernel);
|
||||
}
|
||||
.
|
||||
.
|
||||
.
|
||||
```
|
||||
In CreatePrimitives method, we iterate MklDnnNodes and creates MklDnnKernel objects and add MKL-DNN primitive to a vector. It also reads attributes. This is done only once, at first iteration.
|
||||
In CreatePrimitives method, we iterate DnnlNodes and creates DnnlKernel objects and add DNNL primitive to a vector. It also reads attributes. This is done only once, at first iteration.
|
||||
|
||||
```
|
||||
SubgraphPrimitve::Compute()
|
||||
|
@ -61,5 +61,5 @@ SubgraphPrimitve::Compute()
|
|||
stream->submit(net);
|
||||
```
|
||||
|
||||
In SubgraphPrimitve::Compute() method, we iterate thru MklDnn Kernels and bind input data. Then we submit the vector of Primitives to MKL-DNN stream.
|
||||
In SubgraphPrimitve::Compute() method, we iterate thru Dnnl Kernels and bind input data. Then we submit the vector of Primitives to DNNL stream.
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ constexpr const char* kMSDmlDomain = "com.microsoft.dml";
|
|||
constexpr const char* kNGraphDomain = "com.intel.ai";
|
||||
constexpr const char* kCpuExecutionProvider = "CPUExecutionProvider";
|
||||
constexpr const char* kCudaExecutionProvider = "CUDAExecutionProvider";
|
||||
constexpr const char* kMklDnnExecutionProvider = "MKLDNNExecutionProvider";
|
||||
constexpr const char* kDnnlExecutionProvider = "DnnlExecutionProvider";
|
||||
constexpr const char* kNGraphExecutionProvider = "NGRAPHExecutionProvider";
|
||||
constexpr const char* kOpenVINOExecutionProvider = "OpenVINOExecutionProvider";
|
||||
constexpr const char* kNupharExecutionProvider = "NupharExecutionProvider";
|
||||
|
|
|
@ -10,7 +10,7 @@ extern "C" {
|
|||
/**
|
||||
* \param use_arena zero: false. non-zero: true.
|
||||
*/
|
||||
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Mkldnn, _In_ OrtSessionOptions* options, int use_arena);
|
||||
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
|
@ -97,7 +97,7 @@ AllocatorPtr GetAllocator(const SessionState& session_state, const OrtMemoryInfo
|
|||
|
||||
bool ProviderIsCpuBased(const std::string& provider_type) {
|
||||
return provider_type == onnxruntime::kCpuExecutionProvider ||
|
||||
provider_type == onnxruntime::kMklDnnExecutionProvider ||
|
||||
provider_type == onnxruntime::kDnnlExecutionProvider ||
|
||||
provider_type == onnxruntime::kNGraphExecutionProvider ||
|
||||
provider_type == onnxruntime::kNupharExecutionProvider ||
|
||||
provider_type == onnxruntime::kOpenVINOExecutionProvider ||
|
||||
|
|
|
@ -70,7 +70,7 @@ static const onnx::TensorProto* GetInitializer(const Graph& graph, const std::st
|
|||
common::Status MemcpyTransformer::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const {
|
||||
for (auto& provider : provider_types_) {
|
||||
if (provider != onnxruntime::kCpuExecutionProvider &&
|
||||
provider != onnxruntime::kMklDnnExecutionProvider &&
|
||||
provider != onnxruntime::kDnnlExecutionProvider &&
|
||||
provider != onnxruntime::kNGraphExecutionProvider &&
|
||||
provider != onnxruntime::kNupharExecutionProvider &&
|
||||
provider != onnxruntime::kOpenVINOExecutionProvider &&
|
||||
|
|
|
@ -3,28 +3,28 @@
|
|||
|
||||
#pragma once
|
||||
#include "core/common/common.h"
|
||||
#include "mkldnn.hpp"
|
||||
#include "dnnl.hpp"
|
||||
#include <unordered_map>
|
||||
#include <list>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
template <typename T>
|
||||
static mkldnn::memory::data_type MklDnnType();
|
||||
static dnnl::memory::data_type DnnnType();
|
||||
|
||||
// Add more types here as needed.
|
||||
template <>
|
||||
mkldnn::memory::data_type MklDnnType<float>() {
|
||||
return mkldnn::memory::data_type::f32;
|
||||
dnnl::memory::data_type DnnnType<float>() {
|
||||
return dnnl::memory::data_type::f32;
|
||||
}
|
||||
|
||||
static mkldnn::engine& GetEngine() {
|
||||
static mkldnn::engine cpu_engine = mkldnn::engine(mkldnn::engine::kind::cpu, 0);
|
||||
static dnnl::engine& GetEngine() {
|
||||
static dnnl::engine cpu_engine = dnnl::engine(dnnl::engine::kind::cpu, 0);
|
||||
return cpu_engine;
|
||||
}
|
||||
|
||||
static void AddDimsToKey(std::string& key, const mkldnn::memory::dims& dims) {
|
||||
static void AddDimsToKey(std::string& key, const dnnl::memory::dims& dims) {
|
||||
key.append(1, '#');
|
||||
for (size_t i = 0; i < dims.size(); i++) {
|
||||
key.append(std::to_string(dims[i]));
|
||||
|
@ -69,5 +69,5 @@ class PrimitivePool {
|
|||
return map;
|
||||
}
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -8,22 +8,22 @@
|
|||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/compute_capability.h"
|
||||
#include "core/framework/kernel_registry.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_func_kernel.h"
|
||||
#include "mkldnn_execution_provider.h"
|
||||
#include "mkldnn_fwd.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_func_kernel.h"
|
||||
#include "dnnl_execution_provider.h"
|
||||
#include "dnnl_fwd.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
constexpr const char* MKLDNN = "MklDnn";
|
||||
constexpr const char* MKLDNN_CPU = "MklDnnCpu";
|
||||
constexpr const char* DNNL = "Dnnl";
|
||||
constexpr const char* DNNL_CPU = "DnnlCpu";
|
||||
|
||||
MKLDNNExecutionProvider::MKLDNNExecutionProvider(const MKLDNNExecutionProviderInfo& info)
|
||||
: IExecutionProvider{onnxruntime::kMklDnnExecutionProvider} {
|
||||
DNNLExecutionProvider::DNNLExecutionProvider(const DNNLExecutionProviderInfo& info)
|
||||
: IExecutionProvider{onnxruntime::kDnnlExecutionProvider} {
|
||||
DeviceAllocatorRegistrationInfo default_memory_info({OrtMemTypeDefault,
|
||||
[](int) { return onnxruntime::make_unique<CPUAllocator>(onnxruntime::make_unique<OrtMemoryInfo>(MKLDNN, OrtAllocatorType::OrtDeviceAllocator)); }, std::numeric_limits<size_t>::max()});
|
||||
[](int) { return onnxruntime::make_unique<CPUAllocator>(onnxruntime::make_unique<OrtMemoryInfo>(DNNL, OrtAllocatorType::OrtDeviceAllocator)); }, std::numeric_limits<size_t>::max()});
|
||||
|
||||
DeviceAllocatorRegistrationInfo cpu_memory_info({OrtMemTypeCPUOutput,
|
||||
[](int) { return onnxruntime::make_unique<CPUAllocator>(onnxruntime::make_unique<OrtMemoryInfo>(MKLDNN_CPU, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeCPUOutput)); }, std::numeric_limits<size_t>::max()});
|
||||
[](int) { return onnxruntime::make_unique<CPUAllocator>(onnxruntime::make_unique<OrtMemoryInfo>(DNNL_CPU, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeCPUOutput)); }, std::numeric_limits<size_t>::max()});
|
||||
|
||||
if (info.create_arena) {
|
||||
InsertAllocator(CreateAllocator(default_memory_info));
|
||||
|
@ -38,15 +38,15 @@ MKLDNNExecutionProvider::MKLDNNExecutionProvider(const MKLDNNExecutionProviderIn
|
|||
}
|
||||
} // namespace onnxruntime
|
||||
|
||||
MKLDNNExecutionProvider::~MKLDNNExecutionProvider() {
|
||||
DNNLExecutionProvider::~DNNLExecutionProvider() {
|
||||
}
|
||||
|
||||
namespace mkl_dnn {
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kMklDnnExecutionProvider, kOnnxDomain, 7, Gemm);
|
||||
namespace ort_dnnl {
|
||||
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kDnnlExecutionProvider, kOnnxDomain, 7, Gemm);
|
||||
|
||||
void RegisterMKLDNNKernels(KernelRegistry& kernel_registry) {
|
||||
void RegisterDNNLKernels(KernelRegistry& kernel_registry) {
|
||||
static const BuildKernelCreateInfoFn function_table[] = {
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kMklDnnExecutionProvider, kOnnxDomain, 7, Gemm)>,
|
||||
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kDnnlExecutionProvider, kOnnxDomain, 7, Gemm)>,
|
||||
};
|
||||
|
||||
for (auto& function_table_entry : function_table) {
|
||||
|
@ -54,25 +54,23 @@ void RegisterMKLDNNKernels(KernelRegistry& kernel_registry) {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<KernelRegistry> GetMklDnnKernelRegistry() {
|
||||
std::shared_ptr<KernelRegistry> GetDnnlKernelRegistry() {
|
||||
std::shared_ptr<KernelRegistry> kernel_registry = std::make_shared<KernelRegistry>();
|
||||
RegisterMKLDNNKernels(*kernel_registry);
|
||||
RegisterDNNLKernels(*kernel_registry);
|
||||
return kernel_registry;
|
||||
}
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
|
||||
std::shared_ptr<KernelRegistry> MKLDNNExecutionProvider::GetKernelRegistry() const {
|
||||
static std::shared_ptr<KernelRegistry> kernel_registry = onnxruntime::mkl_dnn::GetMklDnnKernelRegistry();
|
||||
std::shared_ptr<KernelRegistry> DNNLExecutionProvider::GetKernelRegistry() const {
|
||||
static std::shared_ptr<KernelRegistry> kernel_registry = onnxruntime::ort_dnnl::GetDnnlKernelRegistry();
|
||||
return kernel_registry;
|
||||
}
|
||||
|
||||
bool MKLDNNExecutionProvider::UseSubgraph(const onnxruntime::GraphViewer& graph_viewer) const {
|
||||
// switch between mkldnn-vanilla and mkldnn-subgraph implementation using
|
||||
// MKLDNN_SUBGRAPH environment variable
|
||||
bool DNNLExecutionProvider::UseSubgraph(const onnxruntime::GraphViewer& graph_viewer) const {
|
||||
bool use_subgraph = true;
|
||||
|
||||
bool FP16_graph = false;
|
||||
bool mkldnn_nodes_in_the_graph = false;
|
||||
bool dnnl_nodes_in_the_graph = false;
|
||||
int max_node_index = graph_viewer.MaxNodeIndex();
|
||||
|
||||
for (auto node_index = 0; node_index < max_node_index; node_index++) {
|
||||
|
@ -92,18 +90,18 @@ bool MKLDNNExecutionProvider::UseSubgraph(const onnxruntime::GraphViewer& graph_
|
|||
continue;
|
||||
}
|
||||
|
||||
auto op_it = mkldnn_ops_.find(node->OpType());
|
||||
if (op_it != mkldnn_ops_.end()) {
|
||||
mkldnn_nodes_in_the_graph = true;
|
||||
auto op_it = dnnl_ops_.find(node->OpType());
|
||||
if (op_it != dnnl_ops_.end()) {
|
||||
dnnl_nodes_in_the_graph = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (FP16_graph || !mkldnn_nodes_in_the_graph) {
|
||||
if (FP16_graph || !dnnl_nodes_in_the_graph) {
|
||||
// FP16 not supported yet.
|
||||
use_subgraph = false;
|
||||
} else {
|
||||
const char* env = getenv("ORT_MKLDNN_SUBGRAPH");
|
||||
const char* env = getenv("ORT_DNNL_SUBGRAPH");
|
||||
if (env != nullptr) {
|
||||
if (atoi(env) == 0) {
|
||||
use_subgraph = false;
|
||||
|
@ -113,9 +111,9 @@ bool MKLDNNExecutionProvider::UseSubgraph(const onnxruntime::GraphViewer& graph_
|
|||
return use_subgraph;
|
||||
}
|
||||
|
||||
void MKLDNNExecutionProvider::CreateOrUpdateMklDnnNode(const Node* node,
|
||||
std::shared_ptr<mkl_dnn::Subgraph>& subgraph_ptr,
|
||||
mkl_dnn::Subgraph::SubgraphVariables& sub_var,
|
||||
void DNNLExecutionProvider::CreateOrUpdateDnnlNode(const Node* node,
|
||||
std::shared_ptr<ort_dnnl::Subgraph>& subgraph_ptr,
|
||||
ort_dnnl::Subgraph::SubgraphVariables& sub_var,
|
||||
bool fused,
|
||||
std::map<std::string, size_t>& output_to_source_node_map,
|
||||
NodeAttributes& subgraph_attributes) const {
|
||||
|
@ -123,29 +121,29 @@ void MKLDNNExecutionProvider::CreateOrUpdateMklDnnNode(const Node* node,
|
|||
sub_var.outputs.push_back(node->OutputDefs()[0]->Name());
|
||||
|
||||
if (!fused) {
|
||||
mkl_dnn::MklDnnNode mkldnn_node;
|
||||
mkldnn_node.name = node->OpType();
|
||||
mkldnn_node.num_inputs = static_cast<int>(node->InputDefs().size());
|
||||
mkldnn_node.input_start_index = static_cast<int>(sub_var.inputs.size()) - 1;
|
||||
mkldnn_node.node_index = static_cast<int>(subgraph_ptr->mkldnn_nodes.size()) + 1;
|
||||
ort_dnnl::DnnlNode dnnl_node;
|
||||
dnnl_node.name = node->OpType();
|
||||
dnnl_node.num_inputs = static_cast<int>(node->InputDefs().size());
|
||||
dnnl_node.input_start_index = static_cast<int>(sub_var.inputs.size()) - 1;
|
||||
dnnl_node.node_index = static_cast<int>(subgraph_ptr->dnnl_nodes.size()) + 1;
|
||||
const auto& node_outputs = node->OutputDefs();
|
||||
mkldnn_node.output_name = node_outputs[0]->Name();
|
||||
dnnl_node.output_name = node_outputs[0]->Name();
|
||||
if (node->OpType() == "Conv") {
|
||||
mkldnn_node.weight_name = node->InputDefs()[1]->Name();
|
||||
dnnl_node.weight_name = node->InputDefs()[1]->Name();
|
||||
}
|
||||
for (size_t i = 0; i < node_inputs.size(); i++) {
|
||||
auto iter = output_to_source_node_map.find(node_inputs[i]->Name());
|
||||
if (iter != output_to_source_node_map.end())
|
||||
mkldnn_node.parent_nodes.push_back(iter->second);
|
||||
dnnl_node.parent_nodes.push_back(iter->second);
|
||||
}
|
||||
subgraph_ptr->mkldnn_nodes.push_back(mkldnn_node);
|
||||
output_to_source_node_map.insert(std::make_pair(node_outputs[0]->Name(), subgraph_ptr->mkldnn_nodes.size() - 1));
|
||||
subgraph_ptr->dnnl_nodes.push_back(dnnl_node);
|
||||
output_to_source_node_map.insert(std::make_pair(node_outputs[0]->Name(), subgraph_ptr->dnnl_nodes.size() - 1));
|
||||
} else {
|
||||
subgraph_ptr->mkldnn_nodes.back().num_inputs += static_cast<int>(node->InputDefs().size() - 1);
|
||||
subgraph_ptr->dnnl_nodes.back().num_inputs += static_cast<int>(node->InputDefs().size() - 1);
|
||||
const auto& node_outputs = node->OutputDefs();
|
||||
output_to_source_node_map.erase(subgraph_ptr->mkldnn_nodes.back().output_name);
|
||||
subgraph_ptr->mkldnn_nodes.back().output_name = node_outputs[0]->Name();
|
||||
output_to_source_node_map.insert(std::make_pair(node_outputs[0]->Name(), subgraph_ptr->mkldnn_nodes.size() - 1));
|
||||
output_to_source_node_map.erase(subgraph_ptr->dnnl_nodes.back().output_name);
|
||||
subgraph_ptr->dnnl_nodes.back().output_name = node_outputs[0]->Name();
|
||||
output_to_source_node_map.insert(std::make_pair(node_outputs[0]->Name(), subgraph_ptr->dnnl_nodes.size() - 1));
|
||||
}
|
||||
|
||||
// Add inputs which are not in the outputs vector.
|
||||
|
@ -163,7 +161,7 @@ void MKLDNNExecutionProvider::CreateOrUpdateMklDnnNode(const Node* node,
|
|||
|
||||
NodeAttributes attributes = node->GetAttributes();
|
||||
if (attributes.size() > 0) {
|
||||
size_t index = subgraph_ptr->mkldnn_nodes.size();
|
||||
size_t index = subgraph_ptr->dnnl_nodes.size();
|
||||
std::string op_name;
|
||||
if (fused) {
|
||||
for (auto iter = node->InputNodesBegin(); iter != node->InputNodesEnd(); ++iter) {
|
||||
|
@ -181,29 +179,27 @@ void MKLDNNExecutionProvider::CreateOrUpdateMklDnnNode(const Node* node,
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapability(
|
||||
std::vector<std::unique_ptr<ComputeCapability>> DNNLExecutionProvider::GetCapability(
|
||||
const onnxruntime::GraphViewer& graph_viewer,
|
||||
const std::vector<const KernelRegistry*>& kernel_registries) const {
|
||||
ORT_UNUSED_PARAMETER(kernel_registries);
|
||||
|
||||
// temporary switch to toggle between mkldnn-vanilla and mkldnn-subgraph implementation using
|
||||
// ORT_MKLDNN_SUBGRAPH environment variable
|
||||
if (UseSubgraph(graph_viewer) == false) {
|
||||
return IExecutionProvider::GetCapability(graph_viewer, kernel_registries);
|
||||
}
|
||||
|
||||
LOGS_DEFAULT(INFO) << "Using MKL-DNN Subgraph";
|
||||
LOGS_DEFAULT(INFO) << "Using DNNL Subgraph";
|
||||
// use sub-graph implementation
|
||||
std::vector<std::unique_ptr<ComputeCapability>> result;
|
||||
mkl_dnn::Subgraph::SubgraphVariables sub_var;
|
||||
std::shared_ptr<mkl_dnn::Subgraph> subgraph_ptr;
|
||||
ort_dnnl::Subgraph::SubgraphVariables sub_var;
|
||||
std::shared_ptr<ort_dnnl::Subgraph> subgraph_ptr;
|
||||
|
||||
// We need graph name make PrimitivePool keys unique.
|
||||
// There are several identical graphs in Model zoo and only differ in
|
||||
// few attribute values. GetGraphName return graph-name + first-node-output name
|
||||
std::string graph_name = GetGraphName(graph_viewer);
|
||||
subgraph_ptr = onnxruntime::make_unique<mkl_dnn::Subgraph>(
|
||||
mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = onnxruntime::make_unique<ort_dnnl::Subgraph>(
|
||||
ort_dnnl::Subgraph(graph_name));
|
||||
|
||||
// output name to node index map. Using it to find sub-graph end nodes
|
||||
// if output of a node is not an input to any node in a sub-graph is end node
|
||||
|
@ -220,43 +216,43 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
|
||||
if (IsDimensionSupported(node) == false) {
|
||||
node_index++;
|
||||
if (subgraph_ptr->mkldnn_nodes.size() > 0) {
|
||||
if (subgraph_ptr->dnnl_nodes.size() > 0) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<ort_dnnl::Subgraph>(ort_dnnl::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto op_it = mkldnn_ops_.find(node->OpType());
|
||||
if (op_it != mkldnn_ops_.end()) {
|
||||
auto op_it = dnnl_ops_.find(node->OpType());
|
||||
if (op_it != dnnl_ops_.end()) {
|
||||
sub_var.subgraph_node_indexes.push_back(node->Index());
|
||||
|
||||
// can we fuse (at mkldnn level) nodes?
|
||||
// can we fuse (at Dnnl level) nodes?
|
||||
bool fused = false;
|
||||
if (sub_var.subgraph_node_indexes.size() > 1 && node->OpType() == "BatchNormalization") {
|
||||
if (subgraph_ptr->mkldnn_nodes.back().name == "Conv") {
|
||||
subgraph_ptr->mkldnn_nodes.back().name += "-BatchNormalization";
|
||||
if (subgraph_ptr->dnnl_nodes.back().name == "Conv") {
|
||||
subgraph_ptr->dnnl_nodes.back().name += "-BatchNormalization";
|
||||
fused = true;
|
||||
}
|
||||
}
|
||||
if (sub_var.subgraph_node_indexes.size() > 1 && node->OpType() == "Relu") {
|
||||
if (subgraph_ptr->mkldnn_nodes.back().name == "Conv-BatchNormalization" || subgraph_ptr->mkldnn_nodes.back().name == "BatchNormalization" || subgraph_ptr->mkldnn_nodes.back().name == "Conv") {
|
||||
subgraph_ptr->mkldnn_nodes.back().name += "-Relu";
|
||||
if (subgraph_ptr->dnnl_nodes.back().name == "Conv-BatchNormalization" || subgraph_ptr->dnnl_nodes.back().name == "BatchNormalization" || subgraph_ptr->dnnl_nodes.back().name == "Conv") {
|
||||
subgraph_ptr->dnnl_nodes.back().name += "-Relu";
|
||||
fused = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Create MklDnn node:
|
||||
// Create Dnnl node:
|
||||
// Update inputs, outputs and parent nodes
|
||||
// Collect attributes and modify the key to make it unique
|
||||
CreateOrUpdateMklDnnNode(node, subgraph_ptr, sub_var, fused, output_to_source_node_map, subgraph_attributes);
|
||||
CreateOrUpdateDnnlNode(node, subgraph_ptr, sub_var, fused, output_to_source_node_map, subgraph_attributes);
|
||||
|
||||
auto temp_index = node_index + 1;
|
||||
if (temp_index < graph_viewer.MaxNodeIndex()) {
|
||||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
// if next node is mkldnn node and if it's input is not output of current node
|
||||
// if next node is Dnnl node and if it's input is not output of current node
|
||||
// if next node input is output of any of the nodes in sub-graph continue
|
||||
// else
|
||||
// break and create sub-graph
|
||||
|
@ -265,8 +261,8 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
temp_index++;
|
||||
next_node = graph_viewer.GetNode(temp_index);
|
||||
}
|
||||
auto sub_it = mkldnn_ops_.find(next_node->OpType());
|
||||
if (sub_it != mkldnn_ops_.end()) {
|
||||
auto sub_it = dnnl_ops_.find(next_node->OpType());
|
||||
if (sub_it != dnnl_ops_.end()) {
|
||||
const auto& next_node_inputs = next_node->InputDefs();
|
||||
bool input_from_subgraph = true;
|
||||
size_t inputs_count = 1;
|
||||
|
@ -282,7 +278,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
if (input_from_subgraph == false) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_attributes.clear();
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<ort_dnnl::Subgraph>(ort_dnnl::Subgraph(graph_name));
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
}
|
||||
|
@ -290,8 +286,8 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
if (node->GetOutputEdgesCount() > 1) {
|
||||
// If current node has branches
|
||||
// iterate and see if all nodes are mkldnn ops OR
|
||||
// it ends in node with same number of input edges (mkldnn node or cpu node)
|
||||
// iterate and see if all nodes are Dnnl ops OR
|
||||
// it ends in node with same number of input edges (Dnnl node or cpu node)
|
||||
// create sub-graph
|
||||
bool create_subgraph = false;
|
||||
bool break_loop = false;
|
||||
|
@ -303,14 +299,14 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
next_node = graph_viewer.GetNode(temp_index);
|
||||
}
|
||||
if (next_node->GetInputEdgesCount() == node->GetOutputEdgesCount()) {
|
||||
// if all nodes in the branch loop are mkldnn nodes
|
||||
// if all nodes in the branch loop are Dnnl nodes
|
||||
// then continue with adding nodes to sub-graph
|
||||
break_loop = true;
|
||||
}
|
||||
// inner nodes. if inner nodes are not mkldnn nodes
|
||||
// inner nodes. if inner nodes are not Dnnl nodes
|
||||
// create subgraph (inception v2)
|
||||
auto sub_it = mkldnn_ops_.find(next_node->OpType());
|
||||
if (sub_it == mkldnn_ops_.end()) {
|
||||
auto sub_it = dnnl_ops_.find(next_node->OpType());
|
||||
if (sub_it == dnnl_ops_.end()) {
|
||||
// break and create a sub-graph
|
||||
break_loop = true;
|
||||
create_subgraph = true;
|
||||
|
@ -322,7 +318,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
}
|
||||
if (create_subgraph) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<ort_dnnl::Subgraph>(ort_dnnl::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
@ -332,7 +328,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
} else {
|
||||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<ort_dnnl::Subgraph>(ort_dnnl::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
@ -341,17 +337,17 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
} // graph_viewer node iterator ends
|
||||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<ort_dnnl::Subgraph>(ort_dnnl::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void MKLDNNExecutionProvider::CreateMetaDef(const onnxruntime::GraphViewer& graph_viewer,
|
||||
void DNNLExecutionProvider::CreateMetaDef(const onnxruntime::GraphViewer& graph_viewer,
|
||||
const NodeAttributes& subgraph_attributes,
|
||||
std::shared_ptr<mkl_dnn::Subgraph>& subgraph_ptr,
|
||||
mkl_dnn::Subgraph::SubgraphVariables& sub_var,
|
||||
std::shared_ptr<ort_dnnl::Subgraph>& subgraph_ptr,
|
||||
ort_dnnl::Subgraph::SubgraphVariables& sub_var,
|
||||
std::vector<std::unique_ptr<ComputeCapability>>& result) const {
|
||||
std::string graph_fused_nodes;
|
||||
std::string node_list;
|
||||
|
@ -376,7 +372,7 @@ void MKLDNNExecutionProvider::CreateMetaDef(const onnxruntime::GraphViewer& grap
|
|||
|
||||
auto meta_def = onnxruntime::make_unique<::onnxruntime::IndexedSubGraph::MetaDef>();
|
||||
meta_def->attributes["initializers"] = initializers;
|
||||
meta_def->name = "MkldnnCustomOp" + std::to_string(subgraph_index_);
|
||||
meta_def->name = "DnnlCustomOp" + std::to_string(subgraph_index_);
|
||||
meta_def->domain = kMSDomain;
|
||||
meta_def->since_version = 1;
|
||||
meta_def->status = ONNX_NAMESPACE::EXPERIMENTAL;
|
||||
|
@ -384,7 +380,7 @@ void MKLDNNExecutionProvider::CreateMetaDef(const onnxruntime::GraphViewer& grap
|
|||
meta_def->attributes.insert(subgraph_attributes.begin(), subgraph_attributes.end());
|
||||
|
||||
// Find the end nodes
|
||||
for (auto& mklnode : subgraph_ptr->mkldnn_nodes) {
|
||||
for (auto& mklnode : subgraph_ptr->dnnl_nodes) {
|
||||
auto itr = std::find(sub_var.outputs_as_input_other_node.begin(),
|
||||
sub_var.outputs_as_input_other_node.end(), mklnode.output_name);
|
||||
if (itr == sub_var.outputs_as_input_other_node.end()) {
|
||||
|
@ -407,25 +403,25 @@ void MKLDNNExecutionProvider::CreateMetaDef(const onnxruntime::GraphViewer& grap
|
|||
sub_var.Reset();
|
||||
}
|
||||
|
||||
Status MKLDNNExecutionProvider::Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
|
||||
Status DNNLExecutionProvider::Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
|
||||
std::vector<NodeComputeInfo>& node_compute_funcs) {
|
||||
for (const auto* fused_node : fused_nodes) {
|
||||
auto attributes = fused_node->GetAttributes();
|
||||
NodeComputeInfo compute_info;
|
||||
|
||||
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
|
||||
auto* p = new onnxruntime::mkl_dnn::MkldnnFuncKernel<float>(context, attributes, this);
|
||||
auto* p = new onnxruntime::ort_dnnl::DnnlFuncKernel<float>(context, attributes, this);
|
||||
*state = p;
|
||||
return 0;
|
||||
};
|
||||
|
||||
compute_info.release_state_func = [](FunctionState state) {
|
||||
if (state)
|
||||
delete static_cast<onnxruntime::mkl_dnn::MkldnnFuncKernel<float>*>(state);
|
||||
delete static_cast<onnxruntime::ort_dnnl::DnnlFuncKernel<float>*>(state);
|
||||
};
|
||||
|
||||
compute_info.compute_func = [](FunctionState state, const OrtCustomOpApi* api, OrtKernelContext* context) {
|
||||
onnxruntime::mkl_dnn::MkldnnFuncKernel<float>* custom_op = reinterpret_cast<mkl_dnn::MkldnnFuncKernel<float>*>(state);
|
||||
onnxruntime::ort_dnnl::DnnlFuncKernel<float>* custom_op = reinterpret_cast<ort_dnnl::DnnlFuncKernel<float>*>(state);
|
||||
return custom_op->Compute(api, context);
|
||||
};
|
||||
|
|
@ -12,32 +12,32 @@
|
|||
#include "core/graph/constants.h"
|
||||
#include "core/framework/allocatormgr.h"
|
||||
#include "core/framework/execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/subgraph.h"
|
||||
#include "core/providers/dnnl/subgraph/subgraph.h"
|
||||
|
||||
namespace mkldnn {
|
||||
namespace dnnl {
|
||||
struct memory;
|
||||
};
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
// Information needed to construct MKL-DNN execution providers.
|
||||
struct MKLDNNExecutionProviderInfo {
|
||||
// Information needed to construct DNNL execution providers.
|
||||
struct DNNLExecutionProviderInfo {
|
||||
bool create_arena{true};
|
||||
|
||||
explicit MKLDNNExecutionProviderInfo(bool use_arena)
|
||||
explicit DNNLExecutionProviderInfo(bool use_arena)
|
||||
: create_arena(use_arena) {}
|
||||
MKLDNNExecutionProviderInfo() = default;
|
||||
DNNLExecutionProviderInfo() = default;
|
||||
};
|
||||
|
||||
// Logical device representation.
|
||||
class MKLDNNExecutionProvider : public IExecutionProvider {
|
||||
class DNNLExecutionProvider : public IExecutionProvider {
|
||||
public:
|
||||
explicit MKLDNNExecutionProvider(const MKLDNNExecutionProviderInfo& info);
|
||||
virtual ~MKLDNNExecutionProvider();
|
||||
explicit DNNLExecutionProvider(const DNNLExecutionProviderInfo& info);
|
||||
virtual ~DNNLExecutionProvider();
|
||||
|
||||
virtual std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> GetWeightsMemoryBuffer(const std::string& weight_key) {
|
||||
std::shared_ptr<dnnl::memory> GetWeightsMemoryBuffer(const std::string& weight_key) {
|
||||
auto iter = weights_mem_map_.find(weight_key);
|
||||
if (iter != weights_mem_map_.end())
|
||||
return iter->second;
|
||||
|
@ -45,7 +45,7 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
}
|
||||
|
||||
void SetWeightsMemoryBuffer(const std::string& weight_key,
|
||||
const std::shared_ptr<mkldnn::memory>& filter_dst_mem) {
|
||||
const std::shared_ptr<dnnl::memory>& filter_dst_mem) {
|
||||
weights_mem_map_.insert(std::make_pair(weight_key, filter_dst_mem));
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
reordered_buffers_.push_back(std::move(buffer));
|
||||
}
|
||||
|
||||
std::shared_ptr<mkldnn::memory> GetBiasMemoryBuffer(const std::string& key) {
|
||||
std::shared_ptr<dnnl::memory> GetBiasMemoryBuffer(const std::string& key) {
|
||||
auto iter = bias_mem_map_.find(key);
|
||||
if (iter != bias_mem_map_.end())
|
||||
return iter->second;
|
||||
|
@ -67,7 +67,7 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
|
||||
// Conv+BathNorm fusion. save scaled bias memory.
|
||||
void SetBiasMemoryBuffer(const std::string& key,
|
||||
const std::shared_ptr<mkldnn::memory>& bias_mem) {
|
||||
const std::shared_ptr<dnnl::memory>& bias_mem) {
|
||||
bias_mem_map_.insert(std::make_pair(key, bias_mem));
|
||||
}
|
||||
|
||||
|
@ -84,14 +84,14 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
std::vector<NodeComputeInfo>& node_compute_funcs) override;
|
||||
|
||||
private:
|
||||
// mkldnn weights(filer data) memory blocks from first iteration
|
||||
// dnnl weights(filer data) memory blocks from first iteration
|
||||
// saved by weights name
|
||||
std::unordered_map<std::string, std::shared_ptr<mkldnn::memory>> weights_mem_map_;
|
||||
std::unordered_map<std::string, std::shared_ptr<dnnl::memory>> weights_mem_map_;
|
||||
// Save reordered memory buffers in list so that memory is not freed.
|
||||
std::vector<IAllocatorUniquePtr<void>> reordered_buffers_;
|
||||
|
||||
// conv+batchnorm fusion. normalized bias memory blocks from first iteration
|
||||
std::unordered_map<std::string, std::shared_ptr<mkldnn::memory>> bias_mem_map_;
|
||||
std::unordered_map<std::string, std::shared_ptr<dnnl::memory>> bias_mem_map_;
|
||||
// Conv+BathNorm fusion bias memory buffer.
|
||||
std::vector<IAllocatorUniquePtr<void>> biass_buffers_;
|
||||
OrtMutex mutex_;
|
||||
|
@ -123,7 +123,7 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
|
||||
bool UseSubgraph(const onnxruntime::GraphViewer& graph_viewer) const;
|
||||
|
||||
// Some dimensions are not supported by MKL-DNN
|
||||
// Some dimensions are not supported by DNNL
|
||||
// example: Pool with NumDimensions <= 3 is not supported
|
||||
// Fall back to CPU implementation
|
||||
bool IsDimensionSupported(const Node* node) const {
|
||||
|
@ -140,43 +140,40 @@ class MKLDNNExecutionProvider : public IExecutionProvider {
|
|||
supported = false;
|
||||
}
|
||||
|
||||
if (node->Op()->SinceVersion() == 10)
|
||||
supported = false;
|
||||
|
||||
if (node->OutputDefs().size() > 1)
|
||||
supported = false;
|
||||
}
|
||||
return supported;
|
||||
}
|
||||
|
||||
void CreateOrUpdateMklDnnNode(const Node* node,
|
||||
std::shared_ptr<mkl_dnn::Subgraph>& subgraph_ptr,
|
||||
mkl_dnn::Subgraph::SubgraphVariables& sub_var,
|
||||
void CreateOrUpdateDnnlNode(const Node* node,
|
||||
std::shared_ptr<ort_dnnl::Subgraph>& subgraph_ptr,
|
||||
ort_dnnl::Subgraph::SubgraphVariables& sub_var,
|
||||
bool fused,
|
||||
std::map<std::string, size_t>& output_to_source_node_map,
|
||||
NodeAttributes& subgraph_attributes) const;
|
||||
|
||||
// Create MklDnn node, update inputs, outputs and parent nodes
|
||||
// Create Dnnl node, update inputs, outputs and parent nodes
|
||||
// collect attribtes
|
||||
void CreateMetaDef(const onnxruntime::GraphViewer& graph_viewer,
|
||||
const NodeAttributes& subgraph_attributes,
|
||||
std::shared_ptr<mkl_dnn::Subgraph>& subgraph_ptr,
|
||||
mkl_dnn::Subgraph::SubgraphVariables& sub_var,
|
||||
std::shared_ptr<ort_dnnl::Subgraph>& subgraph_ptr,
|
||||
ort_dnnl::Subgraph::SubgraphVariables& sub_var,
|
||||
std::vector<std::unique_ptr<ComputeCapability>>& result) const;
|
||||
|
||||
public:
|
||||
const std::shared_ptr<mkl_dnn::Subgraph> GetMklDnnSubgraph(const std::string& subgraph_id) {
|
||||
const std::shared_ptr<ort_dnnl::Subgraph> GetDnnlSubgraph(const std::string& subgraph_id) {
|
||||
return mkl_subgraphs_[subgraph_id];
|
||||
}
|
||||
|
||||
private:
|
||||
mutable int subgraph_index_ = 0;
|
||||
|
||||
// supported MklDnn Operators
|
||||
std::set<std::string> mkldnn_ops_ = {"Conv", "BatchNormalization", "Relu", "Sum",
|
||||
// supported Dnnl Operators
|
||||
std::set<std::string> dnnl_ops_ = {"Conv", "BatchNormalization", "Relu", "Sum",
|
||||
"AveragePool", "GlobalMaxPool", "GlobalAveragePool", "MaxPool", "LRN"};
|
||||
|
||||
mutable std::unordered_map<std::string, std::shared_ptr<mkl_dnn::Subgraph>> mkl_subgraphs_;
|
||||
mutable std::unordered_map<std::string, std::shared_ptr<ort_dnnl::Subgraph>> mkl_subgraphs_;
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
template <typename T>
|
||||
KernelCreateInfo BuildKernelCreateInfo();
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/dnnl/dnnl_provider_factory.h"
|
||||
#include <atomic>
|
||||
#include "dnnl_execution_provider.h"
|
||||
#include "core/session/abi_session_options_impl.h"
|
||||
|
||||
using namespace onnxruntime;
|
||||
|
||||
namespace onnxruntime {
|
||||
struct DnnlProviderFactory : IExecutionProviderFactory {
|
||||
DnnlProviderFactory(bool create_arena) : create_arena_(create_arena) {}
|
||||
~DnnlProviderFactory() override {}
|
||||
|
||||
std::unique_ptr<IExecutionProvider> CreateProvider() override;
|
||||
|
||||
private:
|
||||
bool create_arena_;
|
||||
};
|
||||
|
||||
std::unique_ptr<IExecutionProvider> DnnlProviderFactory::CreateProvider() {
|
||||
DNNLExecutionProviderInfo info;
|
||||
info.create_arena = create_arena_;
|
||||
return onnxruntime::make_unique<DNNLExecutionProvider>(info);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int device_id) {
|
||||
return std::make_shared<onnxruntime::DnnlProviderFactory>(device_id);
|
||||
//TODO: This is apparently a bug. The consructor parameter is create-arena-flag, not the device-id
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
||||
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
|
||||
options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_Dnnl(use_arena));
|
||||
return nullptr;
|
||||
}
|
|
@ -4,18 +4,18 @@
|
|||
#include "gemm.h"
|
||||
#include "core/providers/cpu/math/gemm_helper.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
#include "mkldnn.h"
|
||||
#include "mkldnn.hpp"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "dnnl.h"
|
||||
#include "dnnl.hpp"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
Gemm,
|
||||
kOnnxDomain,
|
||||
7,
|
||||
kMklDnnExecutionProvider,
|
||||
kDnnlExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Gemm<float>);
|
||||
|
||||
|
@ -29,9 +29,9 @@ Status Gemm<float>::Compute(OpKernelContext* ctx) const {
|
|||
if (!helper.State().IsOK())
|
||||
return helper.State();
|
||||
|
||||
mkldnn::memory::dim M = gsl::narrow_cast<int>(helper.M());
|
||||
mkldnn::memory::dim N = gsl::narrow_cast<int>(helper.N());
|
||||
mkldnn::memory::dim K = gsl::narrow_cast<int>(helper.K());
|
||||
dnnl::memory::dim M = gsl::narrow_cast<int>(helper.M());
|
||||
dnnl::memory::dim N = gsl::narrow_cast<int>(helper.N());
|
||||
dnnl::memory::dim K = gsl::narrow_cast<int>(helper.K());
|
||||
auto Y = ctx->Output(0, TensorShape({M, N}));
|
||||
|
||||
if (M <= 0)
|
||||
|
@ -81,19 +81,19 @@ Status Gemm<float>::Compute(OpKernelContext* ctx) const {
|
|||
}
|
||||
}
|
||||
|
||||
// mkldnn_sgemm expects row major matrices, so no need to swap the operands A and B
|
||||
auto status = mkldnn_sgemm(trans_A_ ? 'T' : 'N',
|
||||
// dnnl_sgemm expects row major matrices, so no need to swap the operands A and B
|
||||
auto status = dnnl_sgemm(trans_A_ ? 'T' : 'N',
|
||||
trans_B_ ? 'T' : 'N',
|
||||
M, N, K,
|
||||
alpha_, X->template Data<float>() , trans_A_ ? M : K,
|
||||
W->template Data<float>(), trans_B_ ? K : N,
|
||||
beta_, Y->template MutableData<float>(), N);
|
||||
if (status == mkldnn_success) {
|
||||
if (status == dnnl_success) {
|
||||
return Status::OK();
|
||||
} else {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "mkldnn_sgemm failed with status: ", status);
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "DNNL_sgemm failed with status: ", status);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -5,7 +5,7 @@
|
|||
#include "core/framework/op_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
template <typename T>
|
||||
class Gemm final : public OpKernel {
|
||||
public:
|
||||
|
@ -29,5 +29,5 @@ class Gemm final : public OpKernel {
|
|||
float alpha_;
|
||||
float beta_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -5,29 +5,29 @@
|
|||
#include "core/util/math.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
template <typename T>
|
||||
class MklDnnRelu : public MklDnnKernel {
|
||||
class DnnlRelu : public DnnlKernel {
|
||||
public:
|
||||
MklDnnRelu(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
DnnlRelu(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ORT_UNUSED_PARAMETER(attributes);
|
||||
ORT_UNUSED_PARAMETER(attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
@ -41,7 +41,7 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
|
||||
mkldnn::memory::dims dims(xdim);
|
||||
dnnl::memory::dims dims(xdim);
|
||||
|
||||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
|
||||
|
@ -52,19 +52,19 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
return;
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims(
|
||||
dnnl::memory::dims src_dims(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
|
||||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
ort_source_desc_ = dnnl::memory::desc(
|
||||
{src_dims}, DnnnType<T>(), ort_source_format_);
|
||||
source_desc_ = ort_source_desc_;
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory({{src_dims}, MklDnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims}, DnnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory({{src_dims}, DnnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
|
||||
} else {
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
|
@ -74,12 +74,12 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
|
||||
primitive_dst_shape_ = TensorShape(x_shape);
|
||||
|
||||
mkldnn::memory::dims dst_dims_mkl(primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::eltwise_relu;
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::eltwise_forward::desc>(
|
||||
mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_inference, algo, *src_md_, 0));
|
||||
relu_fwd_pd_ = onnxruntime::make_unique<mkldnn::eltwise_forward::primitive_desc>(
|
||||
mkldnn::eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
dnnl::memory::dims dst_dims_mkl(primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
dnnl::algorithm algo = dnnl::algorithm::eltwise_relu;
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::eltwise_forward::desc>(
|
||||
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, algo, *src_md_, 0));
|
||||
relu_fwd_pd_ = onnxruntime::make_unique<dnnl::eltwise_forward::primitive_desc>(
|
||||
dnnl::eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
primitive_src_desc_ = relu_fwd_pd_.get()->src_desc();
|
||||
primitive_dst_desc_ = relu_fwd_pd_.get()->dst_desc();
|
||||
|
@ -89,29 +89,29 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = std::make_shared<dnnl::memory>(dnnl::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = std::make_shared<dnnl::memory>(dnnl::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// Intermediate node. Use dnnl kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = std::make_shared<dnnl::memory>(dnnl::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
relu_fwd_ = onnxruntime::make_unique<mkldnn::eltwise_forward>(
|
||||
mkldnn::eltwise_forward(*relu_fwd_pd_));
|
||||
relu_fwd_ = onnxruntime::make_unique<dnnl::eltwise_forward>(
|
||||
dnnl::eltwise_forward(*relu_fwd_pd_));
|
||||
|
||||
net.push_back(*relu_fwd_);
|
||||
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -147,13 +147,13 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::eltwise_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<mkldnn::eltwise_forward::primitive_desc> relu_fwd_pd_;
|
||||
std::unique_ptr<mkldnn::primitive> relu_fwd_;
|
||||
std::unique_ptr<dnnl::eltwise_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<dnnl::eltwise_forward::primitive_desc> relu_fwd_pd_;
|
||||
std::unique_ptr<dnnl::primitive> relu_fwd_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
#pragma once
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/mkldnn/memcpy_s.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
#include "core/providers/dnnl/memcpy_s.h"
|
||||
#include "core/util/math.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
class BatchNormHelper {
|
||||
public:
|
||||
|
@ -80,12 +80,12 @@ class BatchNormHelper {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
class MklDnnBatchNorm : public MklDnnKernel {
|
||||
class DnnlBatchNorm : public DnnlKernel {
|
||||
public:
|
||||
explicit MklDnnBatchNorm(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
explicit DnnlBatchNorm(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
void ReadAttributes(const NodeAttributes& attributes,
|
||||
|
@ -99,9 +99,9 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
@ -113,22 +113,22 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
ort.ReleaseTensorTypeAndShapeInfo(tensor_info);
|
||||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
mkldnn::memory::dims dims(xdim);
|
||||
dnnl::memory::dims dims(xdim);
|
||||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
|
||||
mkldnn::memory::dims src_dims(
|
||||
dnnl::memory::dims src_dims(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
|
||||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
ort_source_desc_ = dnnl::memory::desc(
|
||||
{src_dims}, DnnnType<T>(), ort_source_format_);
|
||||
source_desc_ = ort_source_desc_;
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims}, DnnnType<T>(), ort_source_format_));
|
||||
} else {
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
|
||||
|
@ -138,7 +138,7 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
int num_dimensions = static_cast<int>(x_shape.NumDimensions());
|
||||
if (num_dimensions == 3) {
|
||||
primitive_created_status_ = ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
|
||||
"1D BatchNormalization is not supported in MKLDNN.");
|
||||
"1D BatchNormalization is not supported in DNNL.");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -182,105 +182,105 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
return;
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(
|
||||
dnnl::memory::dims src_dims_mkl(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
mkldnn::memory::dims scale_dims_mkl(
|
||||
dnnl::memory::dims scale_dims_mkl(
|
||||
scale_shape.GetDims().begin(), scale_shape.GetDims().end());
|
||||
mkldnn::memory::dims b_dims_mkl(
|
||||
dnnl::memory::dims b_dims_mkl(
|
||||
b_shape.GetDims().begin(), b_shape.GetDims().end());
|
||||
mkldnn::memory::dims mean_dims_mkl(
|
||||
dnnl::memory::dims mean_dims_mkl(
|
||||
mean_shape.GetDims().begin(), mean_shape.GetDims().end());
|
||||
mkldnn::memory::dims var_dims_mkl(
|
||||
dnnl::memory::dims var_dims_mkl(
|
||||
var_shape.GetDims().begin(), var_shape.GetDims().end());
|
||||
|
||||
mkldnn::memory::dims dst_dims_mkl(
|
||||
dnnl::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
|
||||
scale_shift_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({2, scale_dims_mkl[0]}, MklDnnType<T>(), mkldnn::memory::format_tag::nc));
|
||||
mean_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({mean_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
var_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({var_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
scale_shift_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({2, scale_dims_mkl[0]}, DnnnType<T>(), dnnl::memory::format_tag::nc));
|
||||
mean_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({mean_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::x));
|
||||
var_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({var_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::x));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({dst_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
// scale_shift_mem will allocate 2*C*sizeof(float) buffer
|
||||
//
|
||||
scale_shift_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory({*scale_shift_md_, cpu_engine}));
|
||||
scale_shift_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory({*scale_shift_md_, cpu_engine}));
|
||||
|
||||
mean_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*mean_md_, cpu_engine, nullptr));
|
||||
var_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*var_md_, cpu_engine, nullptr));
|
||||
mean_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(*mean_md_, cpu_engine, nullptr));
|
||||
var_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(*var_md_, cpu_engine, nullptr));
|
||||
|
||||
batchnorm_fwd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::desc>(
|
||||
mkldnn::batch_normalization_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, *src_md_, epsilon_,
|
||||
mkldnn::normalization_flags::use_scale_shift |
|
||||
mkldnn::normalization_flags::use_global_stats));
|
||||
batchnorm_fwd_ = onnxruntime::make_unique<dnnl::batch_normalization_forward::desc>(
|
||||
dnnl::batch_normalization_forward::desc(
|
||||
dnnl::prop_kind::forward_inference, *src_md_, epsilon_,
|
||||
dnnl::normalization_flags::use_scale_shift |
|
||||
dnnl::normalization_flags::use_global_stats));
|
||||
|
||||
if (fuse_relu_) {
|
||||
mkldnn::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
|
||||
dnnl::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(dnnl::round_mode::round_nearest);
|
||||
// Execute RELU as Fuse PostOps
|
||||
const float ops_scale = 1.f;
|
||||
const float ops_alpha = 0.f; // relu negative slope
|
||||
const float ops_beta = 0.f;
|
||||
mkldnn::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
dnnl::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, dnnl::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
|
||||
mkldnn::batch_normalization_forward::primitive_desc(*batchnorm_fwd_, attr, cpu_engine));
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<dnnl::batch_normalization_forward::primitive_desc>(
|
||||
dnnl::batch_normalization_forward::primitive_desc(*batchnorm_fwd_, attr, cpu_engine));
|
||||
} else {
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
|
||||
mkldnn::batch_normalization_forward::primitive_desc(
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<dnnl::batch_normalization_forward::primitive_desc>(
|
||||
dnnl::batch_normalization_forward::primitive_desc(
|
||||
*batchnorm_fwd_, cpu_engine));
|
||||
}
|
||||
|
||||
// out format of this kernel
|
||||
primitive_dst_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_dst_desc_ = static_cast<dnnl::memory::desc>(
|
||||
batchnorm_fwd_pd_.get()->dst_desc());
|
||||
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_src_desc_ = static_cast<dnnl::memory::desc>(
|
||||
batchnorm_fwd_pd_.get()->dst_desc());
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
// Use Dnnl's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
}
|
||||
auto bn = mkldnn::batch_normalization_forward(
|
||||
auto bn = dnnl::batch_normalization_forward(
|
||||
*batchnorm_fwd_pd_);
|
||||
net.push_back(bn);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_MEAN, *mean_mem_},
|
||||
{MKLDNN_ARG_VARIANCE, *var_mem_},
|
||||
{MKLDNN_ARG_SCALE_SHIFT, *scale_shift_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_MEAN, *mean_mem_},
|
||||
{DNNL_ARG_VARIANCE, *var_mem_},
|
||||
{DNNL_ARG_SCALE_SHIFT, *scale_shift_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
|
||||
// Allocate dst buffer if reorder is necessary
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -289,7 +289,7 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
// abort as MKLDNN cannot execute this. but
|
||||
// abort as DNNL cannot execute this. but
|
||||
// ORT try to delete output_tensor buffer data. allocate memory so that it can delete
|
||||
// fix for test_averagepool_1d_default node test
|
||||
ORT_RETURN_IF_ERROR(primitive_created_status_);
|
||||
|
@ -316,7 +316,7 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
auto sdim = tensor_shape.size();
|
||||
|
||||
TensorShape scale_shape(sshape, sdim);
|
||||
mkldnn::memory::dims scale_dims_mkl(
|
||||
dnnl::memory::dims scale_dims_mkl(
|
||||
scale_shape.GetDims().begin(), scale_shape.GetDims().end());
|
||||
|
||||
mean_mem_->set_data_handle(static_cast<void*>(const_cast<T*>(mean_data)));
|
||||
|
@ -346,23 +346,23 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::unique_ptr<mkldnn::memory> scale_shift_mem_;
|
||||
std::unique_ptr<mkldnn::memory> mean_mem_;
|
||||
std::unique_ptr<mkldnn::memory> var_mem_;
|
||||
std::unique_ptr<mkldnn::memory> dst_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
std::unique_ptr<dnnl::memory> scale_shift_mem_;
|
||||
std::unique_ptr<dnnl::memory> mean_mem_;
|
||||
std::unique_ptr<dnnl::memory> var_mem_;
|
||||
std::unique_ptr<dnnl::memory> dst_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> scale_shift_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> mean_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> var_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> dst_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> scale_shift_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> mean_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> var_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> dst_md_;
|
||||
|
||||
std::unique_ptr<mkldnn::batch_normalization_forward::desc> batchnorm_fwd_;
|
||||
std::unique_ptr<mkldnn::batch_normalization_forward::primitive_desc> batchnorm_fwd_pd_;
|
||||
std::unique_ptr<dnnl::batch_normalization_forward::desc> batchnorm_fwd_;
|
||||
std::unique_ptr<dnnl::batch_normalization_forward::primitive_desc> batchnorm_fwd_pd_;
|
||||
|
||||
protected:
|
||||
float epsilon_ = 1e-5f;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -2,16 +2,16 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "mkldnn_types.h"
|
||||
#include "dnnl_types.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/cpu/nn/autopad_type.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
#include "core/util/math.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
// helper function
|
||||
template <bool ForceSymmetricAutoPadding>
|
||||
|
@ -61,22 +61,22 @@ Status ComputePadAndOutputShape(
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
class MklDnnConv : public MklDnnKernel {
|
||||
class DnnlConv : public DnnlKernel {
|
||||
public:
|
||||
MklDnnConv(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
DnnlConv(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
|
||||
stream_ = onnxruntime::make_unique<dnnl::stream>(dnnl::stream(cpu_engine));
|
||||
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
|
@ -98,17 +98,17 @@ class MklDnnConv : public MklDnnKernel {
|
|||
ort.ReleaseTensorTypeAndShapeInfo(tensor_info);
|
||||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
ort_source_format_ = mkldnn::memory::format_tag::any;
|
||||
ort_source_format_ = dnnl::memory::format_tag::any;
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
} else {
|
||||
// get the output of previous node (mkldnn block propagation).
|
||||
// get the output of previous node (Dnnl block propagation).
|
||||
// TODO Sourcenode will set src of this node.
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
|
||||
source_desc_ = parents_[0].get()->primitive_dst_desc_;
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
dnnl::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
}
|
||||
|
||||
primitive_created_status_ = ValidateInputShape(x_shape, w_shape);
|
||||
|
@ -166,11 +166,11 @@ class MklDnnConv : public MklDnnKernel {
|
|||
TensorShape y_shape(y_dims);
|
||||
primitive_dst_shape_ = TensorShape(y_dims);
|
||||
TensorShape output_shape = y_shape.Slice(2);
|
||||
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
dnnl::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({dst_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
dnnl::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
filter_dims_mkl.assign(w_shape.GetDims().begin(), w_shape.GetDims().end());
|
||||
} else {
|
||||
|
@ -178,16 +178,16 @@ class MklDnnConv : public MklDnnKernel {
|
|||
static_cast<int>(w_shape[0] / group_mkl)});
|
||||
filter_dims_mkl.insert(filter_dims_mkl.end(), w_shape.GetDims().begin() + 1, w_shape.GetDims().end());
|
||||
}
|
||||
mkldnn::memory::dims strides_mkl(strides.begin(), strides.end());
|
||||
mkldnn::memory::dims dilations_mkl(dilations.begin(), dilations.end());
|
||||
// mkldnn dilations start from 0 so we need to subtract 1 from each dim.
|
||||
dnnl::memory::dims strides_mkl(strides.begin(), strides.end());
|
||||
dnnl::memory::dims dilations_mkl(dilations.begin(), dilations.end());
|
||||
// Dnnl dilations start from 0 so we need to subtract 1 from each dim.
|
||||
for (size_t dim = 0; dim < kernel_rank; dim++) {
|
||||
dilations_mkl[dim] -= 1;
|
||||
}
|
||||
|
||||
mkldnn::memory::dims padding_left_mkl(pads.begin(), pads.begin() + kernel_rank);
|
||||
mkldnn::memory::dims padding_right_mkl(pads.begin() + kernel_rank, pads.end());
|
||||
mkldnn::memory::dims bias_dims_mkl;
|
||||
dnnl::memory::dims padding_left_mkl(pads.begin(), pads.begin() + kernel_rank);
|
||||
dnnl::memory::dims padding_right_mkl(pads.begin() + kernel_rank, pads.end());
|
||||
dnnl::memory::dims bias_dims_mkl;
|
||||
if (mklnode_ptr_->num_inputs == 3) {
|
||||
const OrtValue* binput_tensor = ort.KernelContext_GetInput(context, input_index + 2);
|
||||
auto btensor_info = ort.GetTensorTypeAndShape(binput_tensor);
|
||||
|
@ -199,165 +199,165 @@ class MklDnnConv : public MklDnnKernel {
|
|||
bias_dims_mkl.assign(b_shape.GetDims().begin(), b_shape.GetDims().end());
|
||||
}
|
||||
|
||||
auto src_format = mkldnn::memory::format_tag::any;
|
||||
auto src_format = dnnl::memory::format_tag::any;
|
||||
if (kernel_rank == 1) {
|
||||
src_format = mkldnn::memory::format_tag::ncw;
|
||||
src_format = dnnl::memory::format_tag::ncw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oiw;
|
||||
filter_format_ = dnnl::memory::format_tag::oiw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goiw;
|
||||
filter_format_ = dnnl::memory::format_tag::goiw;
|
||||
}
|
||||
} else if (kernel_rank == 2) {
|
||||
src_format = mkldnn::memory::format_tag::nchw;
|
||||
src_format = dnnl::memory::format_tag::nchw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oihw;
|
||||
filter_format_ = dnnl::memory::format_tag::oihw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goihw;
|
||||
filter_format_ = dnnl::memory::format_tag::goihw;
|
||||
}
|
||||
} else {
|
||||
src_format = mkldnn::memory::format_tag::ncdhw;
|
||||
src_format = dnnl::memory::format_tag::ncdhw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oidhw;
|
||||
filter_format_ = dnnl::memory::format_tag::oidhw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goidhw;
|
||||
filter_format_ = dnnl::memory::format_tag::goidhw;
|
||||
}
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
dnnl::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
ort_source_format_ = src_format;
|
||||
ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
ort_source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), src_format);
|
||||
source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), src_format);
|
||||
}
|
||||
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
|
||||
// Set the memory descriptors to format::any to allow DNNL to decide what the optimal memory layout should be
|
||||
// for the computation given the input
|
||||
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
filter_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({filter_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
if (!bias_dims_mkl.empty())
|
||||
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
bias_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({bias_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims conv_zero_padding = {0, 0};
|
||||
dnnl::memory::dims conv_zero_padding = {0, 0};
|
||||
|
||||
if (!bias_dims_mkl.empty()) {
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::convolution_forward::desc>(
|
||||
dnnl::convolution_forward::desc(
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
} else {
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::convolution_forward::desc>(
|
||||
dnnl::convolution_forward::desc(
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *primitive_dst_md_, strides_mkl,
|
||||
dilations_mkl, padding_left_mkl, padding_right_mkl));
|
||||
}
|
||||
|
||||
if (fuse_relu_) {
|
||||
mkldnn::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
|
||||
dnnl::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(dnnl::round_mode::round_nearest);
|
||||
// Execute RELU as Fuse PostOps
|
||||
const float ops_scale = 1.f;
|
||||
const float ops_alpha = 0.f; // relu negative slope
|
||||
const float ops_beta = 0.f;
|
||||
mkldnn::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
dnnl::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, dnnl::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<dnnl::convolution_forward::primitive_desc>(
|
||||
dnnl::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
} else {
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<dnnl::convolution_forward::primitive_desc>(
|
||||
dnnl::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
}
|
||||
|
||||
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_src_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->src_desc());
|
||||
|
||||
filter_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
filter_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->weights_desc());
|
||||
|
||||
primitive_dst_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_dst_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->dst_desc());
|
||||
|
||||
src_size_ = conv_fwd_pd_.get()->src_desc().get_size();
|
||||
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
|
||||
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
|
||||
|
||||
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
filter_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
|
||||
dnnl::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = dnnl::memory::desc({{src_dims}, DnnnType<T>(), ort_source_format_});
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(dnnl::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{DNNL_ARG_FROM, *src_mem_from_},
|
||||
{DNNL_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
}
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
// Use Dnnl's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
if (!bias_dims_mkl.empty()) {
|
||||
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
bias_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<dnnl::convolution_forward>(
|
||||
dnnl::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
{MKLDNN_ARG_BIAS, *bias_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_WEIGHTS, *filter_mem_},
|
||||
{DNNL_ARG_BIAS, *bias_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
} else {
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
conv_fwd_ = onnxruntime::make_unique<dnnl::convolution_forward>(
|
||||
dnnl::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_WEIGHTS, *filter_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
}
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, mkldnn::engine& cpu_engine) override {
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, dnnl::engine& cpu_engine) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
@ -373,7 +373,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
|
||||
const int group_mkl = static_cast<int>(group_);
|
||||
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
dnnl::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
filter_dims_mkl.assign(W.GetDims().begin(), W.GetDims().end());
|
||||
} else {
|
||||
|
@ -385,16 +385,16 @@ class MklDnnConv : public MklDnnKernel {
|
|||
{
|
||||
// lock to make sure reordering is done only once
|
||||
std::lock_guard<OrtMutex> lock(provider_->GetMutex());
|
||||
std::shared_ptr<mkldnn::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
|
||||
if (filter_dst_mem == nullptr) {
|
||||
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)filter_data);
|
||||
dnnl::memory src = dnnl::memory({{filter_dims_mkl}, DnnnType<T>(), filter_format_}, cpu_engine, (void*)filter_data);
|
||||
IAllocatorUniquePtr<void> filter_reorder_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
|
||||
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
filter_dst_mem = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
|
||||
mkldnn::reorder(src, *filter_dst_mem)
|
||||
dnnl::reorder(src, *filter_dst_mem)
|
||||
.execute(cpu_engine, src, *filter_dst_mem);
|
||||
|
||||
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
|
||||
|
@ -418,7 +418,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
const OrtValue* binput_tensor = ort.KernelContext_GetInput(context, input_index + 2);
|
||||
bias_data = const_cast<T*>(ort.GetTensorData<T>(binput_tensor));
|
||||
}
|
||||
std::shared_ptr<mkldnn::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
if (filter_dst_mem == nullptr) {
|
||||
ReorderWeights(api, context, GetEngine());
|
||||
filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
|
@ -527,28 +527,28 @@ class MklDnnConv : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
mkldnn::memory::desc filter_desc_;
|
||||
mkldnn::memory::format_tag filter_format_;
|
||||
dnnl::memory::desc filter_desc_;
|
||||
dnnl::memory::format_tag filter_format_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_from_;
|
||||
std::unique_ptr<mkldnn::memory> src_mem_to_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_from_;
|
||||
std::unique_ptr<dnnl::memory> src_mem_to_;
|
||||
|
||||
size_t src_size_;
|
||||
size_t filter_size_;
|
||||
size_t dst_size_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::unique_ptr<mkldnn::memory> filter_mem_;
|
||||
std::unique_ptr<mkldnn::memory> bias_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
std::unique_ptr<dnnl::memory> filter_mem_;
|
||||
std::unique_ptr<dnnl::memory> bias_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<dnnl::convolution_forward::desc> fwd_desc_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> filter_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> bias_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> filter_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> bias_md_;
|
||||
|
||||
std::unique_ptr<mkldnn::convolution_forward::primitive_desc> conv_fwd_pd_;
|
||||
std::unique_ptr<mkldnn::primitive> conv_fwd_;
|
||||
std::unique_ptr<dnnl::convolution_forward::primitive_desc> conv_fwd_pd_;
|
||||
std::unique_ptr<dnnl::primitive> conv_fwd_;
|
||||
|
||||
private:
|
||||
IAllocatorUniquePtr<void> src_reorder_buffer_;
|
||||
|
@ -636,7 +636,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<mkldnn::stream> stream_;
|
||||
std::unique_ptr<dnnl::stream> stream_;
|
||||
std::vector<int64_t> kernel_shape_; // must use ComputeKernelShape(...), instead of kernel_shape_
|
||||
AutoPadType auto_pad_;
|
||||
int64_t group_;
|
||||
|
@ -647,5 +647,5 @@ class MklDnnConv : public MklDnnKernel {
|
|||
std::string activation_;
|
||||
float alpha_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -2,34 +2,34 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "mkldnn_types.h"
|
||||
#include "dnnl_types.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/cpu/nn/autopad_type.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
#include "core/util/math.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
template <typename T>
|
||||
class MklDnnConvBatchNorm : public MklDnnKernel {
|
||||
class DnnlConvBatchNorm : public DnnlKernel {
|
||||
public:
|
||||
MklDnnConvBatchNorm(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
DnnlConvBatchNorm(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
|
||||
stream_ = onnxruntime::make_unique<dnnl::stream>(dnnl::stream(cpu_engine));
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
auto wtensor_info = ort.GetTensorTypeAndShape(winput_tensor);
|
||||
|
@ -50,10 +50,10 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
ort.ReleaseTensorTypeAndShapeInfo(tensor_info);
|
||||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
ort_source_format_ = mkldnn::memory::format_tag::any;
|
||||
ort_source_format_ = dnnl::memory::format_tag::any;
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
} else {
|
||||
// get the output of previous node (mkldnn block propagation).
|
||||
// get the output of previous node (Dnnl block propagation).
|
||||
// TODO Sourcenode will set src of this node.
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
|
@ -116,11 +116,11 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
TensorShape y_shape(y_dims);
|
||||
primitive_dst_shape_ = TensorShape(y_dims);
|
||||
TensorShape output_shape = y_shape.Slice(2);
|
||||
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
dnnl::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({dst_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
dnnl::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
filter_dims_mkl.assign(w_shape.GetDims().begin(), w_shape.GetDims().end());
|
||||
} else {
|
||||
|
@ -128,16 +128,16 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
static_cast<int>(w_shape[0] / group_mkl)});
|
||||
filter_dims_mkl.insert(filter_dims_mkl.end(), w_shape.GetDims().begin() + 1, w_shape.GetDims().end());
|
||||
}
|
||||
mkldnn::memory::dims strides_mkl(strides.begin(), strides.end());
|
||||
mkldnn::memory::dims dilations_mkl(dilations.begin(), dilations.end());
|
||||
// mkldnn dilations start from 0 so we need to subtract 1 from each dim.
|
||||
dnnl::memory::dims strides_mkl(strides.begin(), strides.end());
|
||||
dnnl::memory::dims dilations_mkl(dilations.begin(), dilations.end());
|
||||
// Dnnl dilations start from 0 so we need to subtract 1 from each dim.
|
||||
for (size_t dim = 0; dim < kernel_rank; dim++) {
|
||||
dilations_mkl[dim] -= 1;
|
||||
}
|
||||
|
||||
mkldnn::memory::dims padding_left_mkl(pads.begin(), pads.begin() + kernel_rank);
|
||||
mkldnn::memory::dims padding_right_mkl(pads.begin() + kernel_rank, pads.end());
|
||||
mkldnn::memory::dims bias_dims_mkl;
|
||||
dnnl::memory::dims padding_left_mkl(pads.begin(), pads.begin() + kernel_rank);
|
||||
dnnl::memory::dims padding_right_mkl(pads.begin() + kernel_rank, pads.end());
|
||||
dnnl::memory::dims bias_dims_mkl;
|
||||
|
||||
int batchNormIndix = (mklnode_ptr_->num_inputs == 7) ? input_index + 3 : input_index + 2;
|
||||
if (mklnode_ptr_->num_inputs == 7) {
|
||||
|
@ -160,143 +160,143 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
bias_dims_mkl.assign(b_shape.GetDims().begin(), b_shape.GetDims().end());
|
||||
}
|
||||
|
||||
auto src_format = mkldnn::memory::format_tag::any;
|
||||
auto src_format = dnnl::memory::format_tag::any;
|
||||
if (kernel_rank == 1) {
|
||||
src_format = mkldnn::memory::format_tag::ncw;
|
||||
src_format = dnnl::memory::format_tag::ncw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oiw;
|
||||
filter_format_ = dnnl::memory::format_tag::oiw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goiw;
|
||||
filter_format_ = dnnl::memory::format_tag::goiw;
|
||||
}
|
||||
} else if (kernel_rank == 2) {
|
||||
src_format = mkldnn::memory::format_tag::nchw;
|
||||
src_format = dnnl::memory::format_tag::nchw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oihw;
|
||||
filter_format_ = dnnl::memory::format_tag::oihw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goihw;
|
||||
filter_format_ = dnnl::memory::format_tag::goihw;
|
||||
}
|
||||
} else {
|
||||
src_format = mkldnn::memory::format_tag::ncdhw;
|
||||
src_format = dnnl::memory::format_tag::ncdhw;
|
||||
if (group_mkl == 1) {
|
||||
filter_format_ = mkldnn::memory::format_tag::oidhw;
|
||||
filter_format_ = dnnl::memory::format_tag::oidhw;
|
||||
} else {
|
||||
filter_format_ = mkldnn::memory::format_tag::goidhw;
|
||||
filter_format_ = dnnl::memory::format_tag::goidhw;
|
||||
}
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
dnnl::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
ort_source_format_ = src_format;
|
||||
ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
ort_source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), src_format);
|
||||
source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), src_format);
|
||||
}
|
||||
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
|
||||
// Set the memory descriptors to format::any to allow DNNL to decide what the optimal memory layout should be
|
||||
// for the computation given the input
|
||||
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
filter_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({filter_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
bias_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({bias_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims conv_zero_padding = {0, 0};
|
||||
dnnl::memory::dims conv_zero_padding = {0, 0};
|
||||
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::convolution_forward::desc>(
|
||||
dnnl::convolution_forward::desc(
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
|
||||
if (fuse_relu_) {
|
||||
mkldnn::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
|
||||
dnnl::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(dnnl::round_mode::round_nearest);
|
||||
// Execute RELU as Fuse PostOps
|
||||
const float ops_scale = 1.f;
|
||||
const float ops_alpha = 0.f; // relu negative slope
|
||||
const float ops_beta = 0.f;
|
||||
mkldnn::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
dnnl::post_ops ops;
|
||||
ops.append_eltwise(ops_scale, dnnl::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<dnnl::convolution_forward::primitive_desc>(
|
||||
dnnl::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
} else {
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<dnnl::convolution_forward::primitive_desc>(
|
||||
dnnl::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
}
|
||||
|
||||
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_src_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->src_desc());
|
||||
|
||||
filter_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
filter_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->weights_desc());
|
||||
|
||||
primitive_dst_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
primitive_dst_desc_ = static_cast<dnnl::memory::desc>(
|
||||
conv_fwd_pd_.get()->dst_desc());
|
||||
|
||||
src_size_ = conv_fwd_pd_.get()->src_desc().get_size();
|
||||
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
|
||||
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
|
||||
|
||||
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
filter_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
|
||||
dnnl::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = dnnl::memory::desc({{src_dims}, DnnnType<T>(), ort_source_format_});
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(dnnl::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{DNNL_ARG_FROM, *src_mem_from_},
|
||||
{DNNL_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
}
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
// Use Dnnl's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
bias_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<dnnl::convolution_forward>(
|
||||
dnnl::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
{MKLDNN_ARG_BIAS, *bias_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_WEIGHTS, *filter_mem_},
|
||||
{DNNL_ARG_BIAS, *bias_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
}
|
||||
}
|
||||
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, mkldnn::engine& cpu_engine) override {
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, dnnl::engine& cpu_engine) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
const OrtValue* input_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
|
@ -369,7 +369,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
TensorShape W(xshape, xdim);
|
||||
|
||||
const int group_mkl = static_cast<int>(group_);
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
dnnl::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
filter_dims_mkl.assign(W.GetDims().begin(), W.GetDims().end());
|
||||
} else {
|
||||
|
@ -393,29 +393,29 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
{
|
||||
// lock to make sure reordering is done only once
|
||||
std::lock_guard<OrtMutex> lock(provider_->GetMutex());
|
||||
std::shared_ptr<mkldnn::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
|
||||
if (filter_dst_mem == nullptr) {
|
||||
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)weights_scaled_by_axis.data());
|
||||
dnnl::memory src = dnnl::memory({{filter_dims_mkl}, DnnnType<T>(), filter_format_}, cpu_engine, (void*)weights_scaled_by_axis.data());
|
||||
IAllocatorUniquePtr<void> filter_reorder_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
|
||||
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
filter_dst_mem = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
|
||||
mkldnn::reorder(src, *filter_dst_mem)
|
||||
dnnl::reorder(src, *filter_dst_mem)
|
||||
.execute(cpu_engine, src, *filter_dst_mem);
|
||||
|
||||
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
|
||||
provider_->SetWeightsMemoryBuffer(mklnode_ptr_->weight_name, filter_dst_mem);
|
||||
}
|
||||
|
||||
std::shared_ptr<mkldnn::memory> bias_mem = provider_->GetBiasMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> bias_mem = provider_->GetBiasMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
if (bias_mem == nullptr) {
|
||||
auto bias_size = conv_fwd_pd_.get()->bias_desc().get_size();
|
||||
IAllocatorUniquePtr<void> bias_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, bias_size);
|
||||
bias_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
|
||||
bias_mem = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
|
||||
float* bias_buffer_data = static_cast<float*>(bias_buffer.get());
|
||||
if (mklnode_ptr_->num_inputs == 7) {
|
||||
const OrtValue* conv_bias_tensor = ort.KernelContext_GetInput(context, input_index + 2);
|
||||
|
@ -460,7 +460,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
const T* filter_data = const_cast<T*>(ort.GetTensorData<T>(winput_tensor));
|
||||
|
||||
std::shared_ptr<mkldnn::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
if (filter_dst_mem == nullptr) {
|
||||
ReorderWeights(api, context, GetEngine());
|
||||
filter_dst_mem = provider_->GetWeightsMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
|
@ -468,7 +468,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
filter_data = static_cast<T*>(filter_dst_mem->get_data_handle());
|
||||
filter_mem_->set_data_handle(static_cast<void*>(const_cast<T*>(filter_data)));
|
||||
|
||||
std::shared_ptr<mkldnn::memory> bias_mem = provider_->GetBiasMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
std::shared_ptr<dnnl::memory> bias_mem = provider_->GetBiasMemoryBuffer(mklnode_ptr_->weight_name);
|
||||
const T* bias_data = static_cast<T*>(bias_mem->get_data_handle());
|
||||
bias_mem_->set_data_handle(static_cast<void*>(const_cast<T*>(bias_data)));
|
||||
|
||||
|
@ -574,28 +574,28 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
mkldnn::memory::desc filter_desc_;
|
||||
mkldnn::memory::format_tag filter_format_;
|
||||
dnnl::memory::desc filter_desc_;
|
||||
dnnl::memory::format_tag filter_format_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_from_;
|
||||
std::unique_ptr<mkldnn::memory> src_mem_to_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_from_;
|
||||
std::unique_ptr<dnnl::memory> src_mem_to_;
|
||||
|
||||
size_t src_size_;
|
||||
size_t filter_size_;
|
||||
size_t dst_size_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::unique_ptr<mkldnn::memory> filter_mem_;
|
||||
std::unique_ptr<mkldnn::memory> bias_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
std::unique_ptr<dnnl::memory> filter_mem_;
|
||||
std::unique_ptr<dnnl::memory> bias_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<dnnl::convolution_forward::desc> fwd_desc_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> filter_md_;
|
||||
std::unique_ptr<mkldnn::memory::desc> bias_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> filter_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> bias_md_;
|
||||
|
||||
std::unique_ptr<mkldnn::convolution_forward::primitive_desc> conv_fwd_pd_;
|
||||
std::unique_ptr<mkldnn::primitive> conv_fwd_;
|
||||
std::unique_ptr<dnnl::convolution_forward::primitive_desc> conv_fwd_pd_;
|
||||
std::unique_ptr<dnnl::primitive> conv_fwd_;
|
||||
|
||||
private:
|
||||
IAllocatorUniquePtr<void> src_reorder_buffer_;
|
||||
|
@ -683,7 +683,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<mkldnn::stream> stream_;
|
||||
std::unique_ptr<dnnl::stream> stream_;
|
||||
std::vector<int64_t> kernel_shape_; // must use ComputeKernelShape(...), instead of kernel_shape_
|
||||
AutoPadType auto_pad_;
|
||||
int64_t group_;
|
||||
|
@ -695,5 +695,5 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
float alpha_;
|
||||
float epsilon_ = 1e-5f; // attribute of fused batchnorm.
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -4,21 +4,21 @@
|
|||
#pragma warning(disable : 4505) //Unreferenced local function has been removed
|
||||
#endif
|
||||
|
||||
#include "mkldnn_func_kernel.h"
|
||||
#include "dnnl_func_kernel.h"
|
||||
#include "core/common/exceptions.h"
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "core/providers/mkldnn/mkldnn_common.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_conv.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_batchnorm.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_conv_batchnorm.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_activations.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_pool.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_sum.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_lrn.h"
|
||||
#include "core/providers/dnnl/dnnl_common.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_conv.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_batchnorm.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_conv_batchnorm.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_activations.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_pool.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_sum.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_lrn.h"
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
|
@ -28,7 +28,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
OrtKernelContext* context,
|
||||
const SubgraphParams& params)
|
||||
: cpu_engine_(GetEngine()) {
|
||||
context_.stream = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine_));
|
||||
context_.stream = onnxruntime::make_unique<dnnl::stream>(dnnl::stream(cpu_engine_));
|
||||
|
||||
if (context_.net.size() == 0) {
|
||||
CreateKernels(params);
|
||||
|
@ -59,124 +59,124 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
|
||||
private:
|
||||
void CreateKernels(const SubgraphParams& params) {
|
||||
for (const auto& mkldnn_node : params.subgraph->mkldnn_nodes) {
|
||||
if (mkldnn_node.name == "Conv") {
|
||||
for (const auto& dnnl_node : params.subgraph->dnnl_nodes) {
|
||||
if (dnnl_node.name == "Conv") {
|
||||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConv<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "Conv-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlConv<T>> kernel;
|
||||
kernel = std::make_shared<DnnlConv<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "Conv-Relu") {
|
||||
} else if (dnnl_node.name == "Conv-Relu") {
|
||||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConv<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
os << "Conv-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlConv<T>> kernel;
|
||||
kernel = std::make_shared<DnnlConv<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "Relu") {
|
||||
} else if (dnnl_node.name == "Relu") {
|
||||
std::ostringstream os;
|
||||
os << "Relu-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnRelu<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnRelu<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "Relu-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlRelu<T>> kernel;
|
||||
kernel = std::make_shared<DnnlRelu<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "BatchNormalization") {
|
||||
} else if (dnnl_node.name == "BatchNormalization") {
|
||||
std::ostringstream os;
|
||||
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "BatchNormalization-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<DnnlBatchNorm<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "BatchNormalization-Relu") {
|
||||
} else if (dnnl_node.name == "BatchNormalization-Relu") {
|
||||
std::ostringstream os;
|
||||
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
os << "BatchNormalization-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<DnnlBatchNorm<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "Conv-BatchNormalization") {
|
||||
} else if (dnnl_node.name == "Conv-BatchNormalization") {
|
||||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "Conv-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlConvBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<DnnlConvBatchNorm<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "Conv-BatchNormalization-Relu") {
|
||||
} else if (dnnl_node.name == "Conv-BatchNormalization-Relu") {
|
||||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
os << "Conv-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlConvBatchNorm<T>> kernel;
|
||||
kernel = std::make_shared<DnnlConvBatchNorm<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "MaxPool") {
|
||||
} else if (dnnl_node.name == "MaxPool") {
|
||||
std::ostringstream os;
|
||||
os << "MaxPool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "MaxPool-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlPool<T>> kernel;
|
||||
kernel = std::make_shared<DnnlPool<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "GlobalMaxPool") {
|
||||
} else if (dnnl_node.name == "GlobalMaxPool") {
|
||||
std::ostringstream os;
|
||||
os << "GlobalMaxPool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "GlobalMaxPool-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlPool<T>> kernel;
|
||||
kernel = std::make_shared<DnnlPool<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "AveragePool") {
|
||||
} else if (dnnl_node.name == "AveragePool") {
|
||||
std::ostringstream os;
|
||||
os << "AveragePool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "AveragePool-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlPool<T>> kernel;
|
||||
kernel = std::make_shared<DnnlPool<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "GlobalAveragePool") {
|
||||
} else if (dnnl_node.name == "GlobalAveragePool") {
|
||||
std::ostringstream os;
|
||||
os << "GlobalAveragePool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "GlobalAveragePool-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlPool<T>> kernel;
|
||||
kernel = std::make_shared<DnnlPool<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "LRN") {
|
||||
} else if (dnnl_node.name == "LRN") {
|
||||
std::ostringstream os;
|
||||
os << "LRN-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnLrn<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnLrn<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "LRN-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlLrn<T>> kernel;
|
||||
kernel = std::make_shared<DnnlLrn<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
} else if (mkldnn_node.name == "Sum") {
|
||||
} else if (dnnl_node.name == "Sum") {
|
||||
std::ostringstream os;
|
||||
os << "Sum-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnSum<T>> kernel;
|
||||
kernel = std::make_shared<MklDnnSum<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
os << "Sum-" << dnnl_node.node_index << "-";
|
||||
std::shared_ptr<DnnlSum<T>> kernel;
|
||||
kernel = std::make_shared<DnnlSum<T>>(dnnl_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : dnnl_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
context_.kernels.push_back(kernel);
|
||||
|
@ -185,16 +185,16 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
}
|
||||
|
||||
struct SubgraphContext {
|
||||
std::unique_ptr<mkldnn::stream> stream;
|
||||
std::vector<mkldnn::primitive> net;
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>> net_args;
|
||||
std::vector<std::shared_ptr<MklDnnKernel>> kernels;
|
||||
std::unique_ptr<dnnl::stream> stream;
|
||||
std::vector<dnnl::primitive> net;
|
||||
std::vector<std::unordered_map<int, dnnl::memory>> net_args;
|
||||
std::vector<std::shared_ptr<DnnlKernel>> kernels;
|
||||
|
||||
SubgraphContext() : stream(nullptr) {}
|
||||
};
|
||||
|
||||
void Initialize(const OrtCustomOpApi* api, OrtKernelContext* context) {
|
||||
// Propagate mkldnn block format
|
||||
// Propagate Dnnl block format
|
||||
// dst format of current node to src format of next node
|
||||
for (auto& kernel : context_.kernels) {
|
||||
kernel->CreatePrimitives(api, context, cpu_engine_, context_.net, context_.net_args);
|
||||
|
@ -205,10 +205,10 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
}
|
||||
|
||||
SubgraphContext context_;
|
||||
mkldnn::engine& cpu_engine_;
|
||||
dnnl::engine& cpu_engine_;
|
||||
};
|
||||
|
||||
// Pool which allows for reuse of MKLDNN Conv primitives which are expensive to instantiate.
|
||||
// Pool which allows for reuse of DNNL Conv primitives which are expensive to instantiate.
|
||||
// To address thread safety, the primitives are stored in a map on thread local storage.
|
||||
template <typename T>
|
||||
class SubgraphPrimitivePool : public PrimitivePool<T> {
|
||||
|
@ -218,7 +218,7 @@ class SubgraphPrimitivePool : public PrimitivePool<T> {
|
|||
const SubgraphParams& params) {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
std::string dims_str;
|
||||
for (auto i = 0; i < params.subgraph->mkldnn_nodes[0].num_inputs; i++) {
|
||||
for (auto i = 0; i < params.subgraph->dnnl_nodes[0].num_inputs; i++) {
|
||||
const OrtValue* input_tensor = ort.KernelContext_GetInput(context, i);
|
||||
auto tensor_info = ort.GetTensorTypeAndShape(input_tensor);
|
||||
auto tensor_shape = ort.GetTensorShape(tensor_info);
|
||||
|
@ -227,7 +227,7 @@ class SubgraphPrimitivePool : public PrimitivePool<T> {
|
|||
auto dim = tensor_shape.size();
|
||||
|
||||
TensorShape x_shape(shape, dim);
|
||||
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
dnnl::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
AddDimsToKey(dims_str, src_dims);
|
||||
}
|
||||
|
||||
|
@ -254,20 +254,20 @@ class SubgraphPrimitivePool : public PrimitivePool<T> {
|
|||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
Status MkldnnFuncKernel<T>::Compute(const OrtCustomOpApi* api, OrtKernelContext* context) const {
|
||||
Status DnnlFuncKernel<T>::Compute(const OrtCustomOpApi* api, OrtKernelContext* context) const {
|
||||
Status status;
|
||||
try {
|
||||
SubgraphPrimitive<T>* primitive = SubgraphPrimitivePool<T>::Get(api, context, params_);
|
||||
primitive->UpdateProvider(params_);
|
||||
status = primitive->Compute(api, context);
|
||||
} catch (const mkldnn::error& e) {
|
||||
} catch (const dnnl::error& e) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Status: ", e.status,
|
||||
", message: ", e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
template class MkldnnFuncKernel<float>;
|
||||
template class DnnlFuncKernel<float>;
|
||||
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -3,18 +3,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/graph/onnx_protobuf.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/session/onnxruntime_c_api.h"
|
||||
#include "core/framework/func_api.h"
|
||||
#include "mkldnn_kernel.h"
|
||||
#include "dnnl_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
namespace {
|
||||
struct SubgraphParams {
|
||||
NodeAttributes attributes;
|
||||
MKLDNNExecutionProvider* provider;
|
||||
DNNLExecutionProvider* provider;
|
||||
std::shared_ptr<Subgraph> subgraph;
|
||||
std::string subgraph_id;
|
||||
std::string subgraph_key;
|
||||
|
@ -24,11 +24,11 @@ struct SubgraphParams {
|
|||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
class MkldnnFuncKernel {
|
||||
class DnnlFuncKernel {
|
||||
public:
|
||||
explicit MkldnnFuncKernel(const ComputeContext* context,
|
||||
explicit DnnlFuncKernel(const ComputeContext* context,
|
||||
const NodeAttributes& attributes,
|
||||
MKLDNNExecutionProvider* provider) {
|
||||
DNNLExecutionProvider* provider) {
|
||||
ORT_UNUSED_PARAMETER(context);
|
||||
|
||||
params_.provider = provider;
|
||||
|
@ -37,28 +37,28 @@ class MkldnnFuncKernel {
|
|||
auto sub_it = attributes.find("subgraph_id");
|
||||
if (sub_it->second.type() == ONNX_NAMESPACE::AttributeProto_AttributeType::AttributeProto_AttributeType_STRING) {
|
||||
params_.subgraph_id = sub_it->second.s();
|
||||
params_.subgraph = provider->GetMklDnnSubgraph(params_.subgraph_id);
|
||||
params_.subgraph = provider->GetDnnlSubgraph(params_.subgraph_id);
|
||||
|
||||
std::ostringstream key_os;
|
||||
key_os << params_.subgraph->graph_name << "_" << params_.subgraph_id << "-";
|
||||
key_os << params_.subgraph->mkldnn_nodes.back().ToString() << "-";
|
||||
key_os << params_.subgraph->mkldnn_nodes.back().output_name;
|
||||
key_os << params_.subgraph->dnnl_nodes.back().ToString() << "-";
|
||||
key_os << params_.subgraph->dnnl_nodes.back().output_name;
|
||||
|
||||
if (params_.subgraph->mkldnn_nodes[0].name == "Conv") {
|
||||
if (params_.subgraph->dnnl_nodes[0].name == "Conv") {
|
||||
std::ostringstream os;
|
||||
os << "Conv-" << params_.subgraph->mkldnn_nodes[0].node_index << "-";
|
||||
os << "Conv-" << params_.subgraph->dnnl_nodes[0].node_index << "-";
|
||||
key_os << GetConvAttributeKey(attributes, os.str());
|
||||
}
|
||||
|
||||
if (params_.subgraph->mkldnn_nodes[0].name == "LRN") {
|
||||
if (params_.subgraph->dnnl_nodes[0].name == "LRN") {
|
||||
std::ostringstream os;
|
||||
os << "LRN-" << params_.subgraph->mkldnn_nodes[0].node_index << "-";
|
||||
os << "LRN-" << params_.subgraph->dnnl_nodes[0].node_index << "-";
|
||||
key_os << GetLrnAttributeKey(attributes, os.str());
|
||||
}
|
||||
|
||||
if (params_.subgraph->mkldnn_nodes[0].name.find("Pool") != std::string::npos) {
|
||||
if (params_.subgraph->dnnl_nodes[0].name.find("Pool") != std::string::npos) {
|
||||
std::ostringstream os;
|
||||
os << params_.subgraph->mkldnn_nodes[0].name << "-" << params_.subgraph->mkldnn_nodes[0].node_index << "-";
|
||||
os << params_.subgraph->dnnl_nodes[0].name << "-" << params_.subgraph->dnnl_nodes[0].node_index << "-";
|
||||
key_os << GetPoolAttributesKey(attributes, os.str());
|
||||
}
|
||||
|
||||
|
@ -231,5 +231,5 @@ class MkldnnFuncKernel {
|
|||
private:
|
||||
SubgraphParams params_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -0,0 +1,61 @@
|
|||
// Copyright(C) 2019 Intel Corporation
|
||||
// Licensed under the MIT License
|
||||
|
||||
#include "dnnl_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace ort_dnnl {
|
||||
|
||||
void DnnlKernel::InitDstReorderOutput(dnnl::engine& cpu_engine,
|
||||
dnnl::memory::data_type& data_type,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) {
|
||||
// Allocate dst buffer if reorder is necessary
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder to ONNXRuntime format
|
||||
dnnl::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
dnnl::memory::desc dst_des = dnnl::memory::desc(dst_dims_mkl,
|
||||
data_type, ort_source_format_);
|
||||
reorder_dst_mem_to_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(dst_des, cpu_engine));
|
||||
net.push_back(dnnl::reorder(*primitive_dst_mem_, *reorder_dst_mem_to_));
|
||||
net_args.push_back({{DNNL_ARG_FROM, *primitive_dst_mem_},
|
||||
{DNNL_ARG_TO, *reorder_dst_mem_to_}});
|
||||
}
|
||||
}
|
||||
|
||||
dnnl::memory::format_tag DnnlKernel::GetSourceFormat(int dim_size) {
|
||||
dnnl::memory::format_tag source_format = dnnl::memory::format_tag::any;
|
||||
switch (dim_size) {
|
||||
case 1: {
|
||||
source_format = dnnl::memory::format_tag::x;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
source_format = dnnl::memory::format_tag::nc;
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
source_format = dnnl::memory::format_tag::ntc;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
source_format = dnnl::memory::format_tag::nchw;
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
source_format = dnnl::memory::format_tag::ncdhw;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
source_format = dnnl::memory::format_tag::any;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return source_format;
|
||||
}
|
||||
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -6,41 +6,41 @@
|
|||
#pragma warning(disable : 4244)
|
||||
#endif
|
||||
|
||||
#include "mkldnn.hpp"
|
||||
#include "dnnl.hpp"
|
||||
#include "core/common/cpuid_info.h"
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "core/providers/mkldnn/subgraph/subgraph.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/subgraph.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
class MklDnnKernel {
|
||||
class DnnlKernel {
|
||||
public:
|
||||
MklDnnKernel(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider) {
|
||||
DnnlKernel(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider) {
|
||||
name_ = node.name;
|
||||
mklnode_ptr_ = std::make_shared<MklDnnNode>(node);
|
||||
mklnode_ptr_ = std::make_shared<DnnlNode>(node);
|
||||
provider_ = provider;
|
||||
alloc_ = provider_->GetAllocator(0, OrtMemTypeDefault);
|
||||
}
|
||||
virtual ~MklDnnKernel(){};
|
||||
virtual ~DnnlKernel(){};
|
||||
|
||||
virtual void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) = 0;
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) = 0;
|
||||
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, mkldnn::engine& cpu_engine) {
|
||||
virtual void ReorderWeights(const OrtCustomOpApi* api, OrtKernelContext* context, dnnl::engine& cpu_engine) {
|
||||
ORT_UNUSED_PARAMETER(api);
|
||||
ORT_UNUSED_PARAMETER(context);
|
||||
ORT_UNUSED_PARAMETER(cpu_engine);
|
||||
}
|
||||
virtual void SetProvider(MKLDNNExecutionProvider* provider) {
|
||||
virtual void SetProvider(DNNLExecutionProvider* provider) {
|
||||
provider_ = provider;
|
||||
}
|
||||
MKLDNNExecutionProvider* GetProvider() {
|
||||
DNNLExecutionProvider* GetProvider() {
|
||||
return provider_;
|
||||
}
|
||||
|
||||
|
@ -79,43 +79,43 @@ class MklDnnKernel {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
void InitDstReorderOutput(mkldnn::engine& cpu_engine,
|
||||
mkldnn::memory::data_type& data_type,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args);
|
||||
void InitDstReorderOutput(dnnl::engine& cpu_engine,
|
||||
dnnl::memory::data_type& data_type,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args);
|
||||
|
||||
mkldnn::memory::format_tag GetSourceFormat(int dim_size);
|
||||
dnnl::memory::format_tag GetSourceFormat(int dim_size);
|
||||
|
||||
public:
|
||||
std::string name_;
|
||||
std::vector<std::shared_ptr<MklDnnKernel>> parents_;
|
||||
std::vector<std::shared_ptr<DnnlKernel>> parents_;
|
||||
bool fuse_relu_ = false;
|
||||
bool fuse_sum_ = false;
|
||||
std::shared_ptr<mkldnn::memory> primitive_dst_mem_;
|
||||
std::unique_ptr<mkldnn::memory::desc> primitive_dst_md_;
|
||||
std::shared_ptr<dnnl::memory> primitive_dst_mem_;
|
||||
std::unique_ptr<dnnl::memory::desc> primitive_dst_md_;
|
||||
TensorShape primitive_dst_shape_;
|
||||
mkldnn::memory::desc primitive_dst_desc_;
|
||||
dnnl::memory::desc primitive_dst_desc_;
|
||||
// ONNX Runtime format
|
||||
mkldnn::memory::format_tag ort_source_format_;
|
||||
mkldnn::memory::desc ort_source_desc_;
|
||||
dnnl::memory::format_tag ort_source_format_;
|
||||
dnnl::memory::desc ort_source_desc_;
|
||||
|
||||
// input format.
|
||||
// It can be ORT format (nchw) or blocked memory format from parent node
|
||||
// mkldnn::memory::format_tag source_format_ = mkldnn::memory::format_tag::any;
|
||||
mkldnn::memory::desc source_desc_ = mkldnn::memory::desc();
|
||||
// dnnl::memory::format_tag source_format_ = dnnl::memory::format_tag::any;
|
||||
dnnl::memory::desc source_desc_ = dnnl::memory::desc();
|
||||
Status primitive_created_status_;
|
||||
|
||||
protected:
|
||||
// Pointer to MklNode of subgraph IR
|
||||
std::shared_ptr<MklDnnNode> mklnode_ptr_;
|
||||
std::shared_ptr<DnnlNode> mklnode_ptr_;
|
||||
// input format expected by primitive object
|
||||
mkldnn::memory::desc primitive_src_desc_;
|
||||
dnnl::memory::desc primitive_src_desc_;
|
||||
|
||||
// memory used for reorders
|
||||
std::unique_ptr<mkldnn::memory> reorder_dst_mem_to_;
|
||||
std::unique_ptr<dnnl::memory> reorder_dst_mem_to_;
|
||||
AllocatorPtr alloc_;
|
||||
MKLDNNExecutionProvider* provider_;
|
||||
DNNLExecutionProvider* provider_;
|
||||
};
|
||||
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -5,28 +5,28 @@
|
|||
#include "core/util/math.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
template <typename T>
|
||||
class MklDnnLrn : public MklDnnKernel {
|
||||
class DnnlLrn : public DnnlKernel {
|
||||
public:
|
||||
MklDnnLrn(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
DnnlLrn(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
@ -42,18 +42,18 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
|
||||
mkldnn::memory::dims src_dims(
|
||||
dnnl::memory::dims src_dims(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
|
||||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*src_md_, cpu_engine, nullptr));
|
||||
ort_source_desc_ = dnnl::memory::desc(
|
||||
{src_dims}, DnnnType<T>(), ort_source_format_);
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims}, DnnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(*src_md_, cpu_engine, nullptr));
|
||||
} else {
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
|
@ -63,13 +63,13 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
|
||||
primitive_dst_shape_ = TensorShape(x_shape);
|
||||
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::lrn_across_channels;
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::desc>(
|
||||
mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, algo, *src_md_,
|
||||
dnnl::algorithm algo = dnnl::algorithm::lrn_across_channels;
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::lrn_forward::desc>(
|
||||
dnnl::lrn_forward::desc(dnnl::prop_kind::forward_scoring, algo, *src_md_,
|
||||
size_, alpha_, beta_, bias_));
|
||||
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::primitive_desc>(
|
||||
mkldnn::lrn_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<dnnl::lrn_forward::primitive_desc>(
|
||||
dnnl::lrn_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
|
||||
primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc();
|
||||
|
@ -79,30 +79,30 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// Intermediate node. Use Dnnl kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
lrn_fwd_ = onnxruntime::make_unique<mkldnn::lrn_forward>(
|
||||
mkldnn::lrn_forward(*fwd_primitive_desc_));
|
||||
lrn_fwd_ = onnxruntime::make_unique<dnnl::lrn_forward>(
|
||||
dnnl::lrn_forward(*fwd_primitive_desc_));
|
||||
net.push_back(*lrn_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -172,13 +172,13 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
int size_ = 0;
|
||||
|
||||
private:
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::lrn_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<mkldnn::lrn_forward::primitive_desc> fwd_primitive_desc_;
|
||||
std::unique_ptr<mkldnn::primitive> lrn_fwd_;
|
||||
std::unique_ptr<dnnl::lrn_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<dnnl::lrn_forward::primitive_desc> fwd_primitive_desc_;
|
||||
std::unique_ptr<dnnl::primitive> lrn_fwd_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -2,29 +2,29 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/cpu/nn/autopad_type.h"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
#include "core/util/math.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
template <typename T>
|
||||
class MklDnnPool : public MklDnnKernel {
|
||||
class DnnlPool : public DnnlKernel {
|
||||
public:
|
||||
MklDnnPool(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
DnnlPool(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
op_name_ = node.name;
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine, std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine, std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
@ -36,26 +36,26 @@ class MklDnnPool : public MklDnnKernel {
|
|||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
|
||||
mkldnn::memory::dims dims(xdim);
|
||||
dnnl::memory::dims dims(xdim);
|
||||
x_shape_ = TensorShape(xshape, xdim);
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
dnnl::memory::dims src_dims_mkl(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
// ort_source_desc is the format of ONNX Runtime tensor format
|
||||
ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), ort_source_format_);
|
||||
ort_source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), ort_source_format_);
|
||||
// source_desc is propagating format. input to this op.
|
||||
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), ort_source_format_);
|
||||
source_desc_ = dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), ort_source_format_);
|
||||
|
||||
// reorder for better performance
|
||||
mkldnn::memory::format_tag src_format = GetAVXFormat(src_dims_mkl);
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format));
|
||||
dnnl::memory::format_tag src_format = GetAVXFormat(src_dims_mkl);
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), src_format));
|
||||
} else {
|
||||
// get the output of previous node (mkldnn block propagation).
|
||||
// get the output of previous node (Dnnl block propagation).
|
||||
// TODO Sourcenode will set src of this node.
|
||||
x_shape_ = parents_[0].get()->primitive_dst_shape_;
|
||||
source_desc_ = parents_[0].get()->primitive_dst_desc_;
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
dnnl::memory::dims src_dims_mkl(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
|
||||
|
@ -63,12 +63,12 @@ class MklDnnPool : public MklDnnKernel {
|
|||
|
||||
if (source_desc_ == ort_source_desc_) {
|
||||
// reorder for better performance
|
||||
mkldnn::memory::format_tag fmt = GetAVXFormat(src_dims_mkl);
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), fmt));
|
||||
dnnl::memory::format_tag fmt = GetAVXFormat(src_dims_mkl);
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({src_dims_mkl}, DnnnType<T>(), fmt));
|
||||
} else {
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
|
||||
src_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,7 @@ class MklDnnPool : public MklDnnKernel {
|
|||
|
||||
if (x_shape_.NumDimensions() <= 3) {
|
||||
primitive_created_status_ = ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
|
||||
"1D Pooling is not supported by MKLDNN.");
|
||||
"1D Pooling is not supported by DNNL.");
|
||||
}
|
||||
|
||||
if (global_pooling_) {
|
||||
|
@ -87,35 +87,35 @@ class MklDnnPool : public MklDnnKernel {
|
|||
strides_.assign(kernel_shape_.size(), 1);
|
||||
}
|
||||
|
||||
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
mkldnn::memory::dims kernel_mkl(kernel_shape_.begin(), kernel_shape_.end());
|
||||
mkldnn::memory::dims strides_mkl(strides_.begin(), strides_.end());
|
||||
mkldnn::memory::dims padding_left_mkl(pads_.begin(), pads_.begin() + (pads_.size() / 2));
|
||||
mkldnn::memory::dims padding_right_mkl(pads_.begin() + (pads_.size() / 2), pads_.end());
|
||||
dnnl::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
dnnl::memory::dims kernel_mkl(kernel_shape_.begin(), kernel_shape_.end());
|
||||
dnnl::memory::dims strides_mkl(strides_.begin(), strides_.end());
|
||||
dnnl::memory::dims padding_left_mkl(pads_.begin(), pads_.begin() + (pads_.size() / 2));
|
||||
dnnl::memory::dims padding_right_mkl(pads_.begin() + (pads_.size() / 2), pads_.end());
|
||||
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({dst_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::pooling_max;
|
||||
dnnl::algorithm algo = dnnl::algorithm::pooling_max;
|
||||
if (op_name_ == "AveragePool" || op_name_ == "GlobalAveragePool") {
|
||||
algo = mkldnn::algorithm::pooling_avg_exclude_padding;
|
||||
algo = dnnl::algorithm::pooling_avg_exclude_padding;
|
||||
if (count_include_pad_) {
|
||||
algo = mkldnn::algorithm::pooling_avg_include_padding;
|
||||
algo = dnnl::algorithm::pooling_avg_include_padding;
|
||||
}
|
||||
}
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::desc>(
|
||||
mkldnn::pooling_forward::desc(mkldnn::prop_kind::forward_inference, algo,
|
||||
fwd_desc_ = onnxruntime::make_unique<dnnl::pooling_forward::desc>(
|
||||
dnnl::pooling_forward::desc(dnnl::prop_kind::forward_inference, algo,
|
||||
*src_md_, *primitive_dst_md_,
|
||||
strides_mkl, kernel_mkl,
|
||||
padding_left_mkl, padding_right_mkl));
|
||||
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::primitive_desc>(
|
||||
mkldnn::pooling_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<dnnl::pooling_forward::primitive_desc>(
|
||||
dnnl::pooling_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
// Sub-graph's first node. Read input from input buffer
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
// Sub-graph's inner node. set input to parent's output
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
@ -129,24 +129,24 @@ class MklDnnPool : public MklDnnKernel {
|
|||
|
||||
// reorder source memory for best performance (AVX512);
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc(source_desc_);
|
||||
dnnl::memory::dims src_dims(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
auto pd = dnnl::memory::desc(source_desc_);
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(dnnl::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{DNNL_ARG_FROM, *src_mem_from_},
|
||||
{DNNL_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
@ -157,29 +157,29 @@ class MklDnnPool : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// Intermediate node. Use Dnnl kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
pool_fwd_ = onnxruntime::make_unique<mkldnn::pooling_forward>(
|
||||
mkldnn::pooling_forward(*fwd_primitive_desc_));
|
||||
pool_fwd_ = onnxruntime::make_unique<dnnl::pooling_forward>(
|
||||
dnnl::pooling_forward(*fwd_primitive_desc_));
|
||||
|
||||
net.push_back(*pool_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{DNNL_ARG_SRC, *src_mem_},
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}});
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -298,29 +298,29 @@ class MklDnnPool : public MklDnnKernel {
|
|||
size_t src_size_;
|
||||
size_t dst_size_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_;
|
||||
|
||||
std::unique_ptr<mkldnn::pooling_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::unique_ptr<mkldnn::pooling_forward::primitive_desc> fwd_primitive_desc_;
|
||||
std::unique_ptr<mkldnn::primitive> pool_fwd_;
|
||||
std::unique_ptr<dnnl::pooling_forward::desc> fwd_desc_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
std::unique_ptr<dnnl::pooling_forward::primitive_desc> fwd_primitive_desc_;
|
||||
std::unique_ptr<dnnl::primitive> pool_fwd_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_from_;
|
||||
std::unique_ptr<mkldnn::memory> src_mem_to_;
|
||||
std::shared_ptr<dnnl::memory> src_mem_from_;
|
||||
std::unique_ptr<dnnl::memory> src_mem_to_;
|
||||
|
||||
std::unique_ptr<mkldnn::memory> dst_mem_from_;
|
||||
std::unique_ptr<mkldnn::memory> dst_mem_to_;
|
||||
std::unique_ptr<dnnl::memory> dst_mem_from_;
|
||||
std::unique_ptr<dnnl::memory> dst_mem_to_;
|
||||
|
||||
private:
|
||||
mkldnn::memory::format_tag GetAVXFormat(const mkldnn::memory::dims& src_dims_mkl) {
|
||||
dnnl::memory::format_tag GetAVXFormat(const dnnl::memory::dims& src_dims_mkl) {
|
||||
bool is_2D = src_dims_mkl.size() == 4 ? true : false;
|
||||
mkldnn::memory::format_tag fmt = mkldnn::memory::format_tag::any;
|
||||
dnnl::memory::format_tag fmt = dnnl::memory::format_tag::any;
|
||||
if (CPUIDInfo::GetCPUIDInfo().HasAVX512f()) {
|
||||
fmt = is_2D ? mkldnn::memory::format_tag::nChw16c : mkldnn::memory::format_tag::nCdhw16c;
|
||||
fmt = is_2D ? dnnl::memory::format_tag::nChw16c : dnnl::memory::format_tag::nCdhw16c;
|
||||
} else if (CPUIDInfo::GetCPUIDInfo().HasAVX2() && (src_dims_mkl[1] % 8 == 0)) {
|
||||
fmt = is_2D ? mkldnn::memory::format_tag::nChw8c : mkldnn::memory::format_tag::ncdhw;
|
||||
fmt = is_2D ? dnnl::memory::format_tag::nChw8c : dnnl::memory::format_tag::ncdhw;
|
||||
} else {
|
||||
fmt = is_2D ? mkldnn::memory::format_tag::nchw : mkldnn::memory::format_tag::ncdhw;
|
||||
fmt = is_2D ? dnnl::memory::format_tag::nchw : dnnl::memory::format_tag::ncdhw;
|
||||
}
|
||||
return fmt;
|
||||
}
|
||||
|
@ -413,5 +413,5 @@ class MklDnnPool : public MklDnnKernel {
|
|||
|
||||
TensorShape x_shape_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -3,29 +3,29 @@
|
|||
|
||||
#pragma once
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/mkldnn/mkldnn_fwd.h"
|
||||
#include "core/providers/mkldnn/mkldnn_common.h"
|
||||
#include "core/providers/mkldnn/subgraph/mkldnn_kernel.h"
|
||||
#include "core/providers/dnnl/dnnl_fwd.h"
|
||||
#include "core/providers/dnnl/dnnl_common.h"
|
||||
#include "core/providers/dnnl/subgraph/dnnl_kernel.h"
|
||||
#include "core/util/math.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
template <typename T>
|
||||
class MklDnnSum : public MklDnnKernel {
|
||||
class DnnlSum : public DnnlKernel {
|
||||
public:
|
||||
explicit MklDnnSum(const MklDnnNode& node,
|
||||
MKLDNNExecutionProvider* provider,
|
||||
explicit DnnlSum(const DnnlNode& node,
|
||||
DNNLExecutionProvider* provider,
|
||||
const NodeAttributes& attributes,
|
||||
const std::string attributes_prefix = "") : MklDnnKernel(node, provider) {
|
||||
const std::string attributes_prefix = "") : DnnlKernel(node, provider) {
|
||||
ReadAttributes(attributes, attributes_prefix);
|
||||
}
|
||||
|
||||
void CreatePrimitives(const OrtCustomOpApi* api,
|
||||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
dnnl::engine& cpu_engine,
|
||||
std::vector<dnnl::primitive>& net,
|
||||
std::vector<std::unordered_map<int, dnnl::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int num_inputs = mklnode_ptr_->num_inputs;
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
@ -43,10 +43,10 @@ class MklDnnSum : public MklDnnKernel {
|
|||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
mkldnn::memory::dims src_dims(
|
||||
dnnl::memory::dims src_dims(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
ort_source_desc_ = dnnl::memory::desc(
|
||||
{src_dims}, DnnnType<T>(), ort_source_format_);
|
||||
source_desc_ = ort_source_desc_;
|
||||
} else {
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
|
@ -56,7 +56,7 @@ class MklDnnSum : public MklDnnKernel {
|
|||
}
|
||||
primitive_dst_shape_ = TensorShape(x_shape);
|
||||
|
||||
mkldnn::memory::dims dst_dims_mkl(
|
||||
dnnl::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
|
@ -69,19 +69,19 @@ class MklDnnSum : public MklDnnKernel {
|
|||
ort.ReleaseTensorTypeAndShapeInfo(tensor_info);
|
||||
auto xshape = tensor_shape.data();
|
||||
auto xdim = tensor_shape.size();
|
||||
mkldnn::memory::dims dims(xdim);
|
||||
dnnl::memory::dims dims(xdim);
|
||||
|
||||
x_shape1 = TensorShape(xshape, xdim);
|
||||
mkldnn::memory::dims src_dims(
|
||||
dnnl::memory::dims src_dims(
|
||||
x_shape1.GetDims().begin(), x_shape1.GetDims().end());
|
||||
auto mpd = mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
auto src_memory = mkldnn::memory(mpd, cpu_engine, nullptr);
|
||||
auto mpd = dnnl::memory::desc({src_dims}, DnnnType<T>(), ort_source_format_);
|
||||
auto src_memory = dnnl::memory(mpd, cpu_engine, nullptr);
|
||||
srcs_pd_.push_back(mpd);
|
||||
srcs_memory_.push_back(src_memory);
|
||||
coeff.push_back(1.0);
|
||||
} else {
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
auto mpd = mkldnn::memory::desc(parents_[i].get()->primitive_dst_desc_);
|
||||
auto mpd = dnnl::memory::desc(parents_[i].get()->primitive_dst_desc_);
|
||||
auto src_memory = *parents_[i].get()->primitive_dst_mem_;
|
||||
srcs_pd_.push_back(mpd);
|
||||
srcs_memory_.push_back(src_memory);
|
||||
|
@ -89,44 +89,44 @@ class MklDnnSum : public MklDnnKernel {
|
|||
}
|
||||
}
|
||||
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
sum_pd_ = onnxruntime::make_unique<mkldnn::sum::primitive_desc>(
|
||||
mkldnn::sum::primitive_desc(*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<dnnl::memory::desc>(
|
||||
dnnl::memory::desc({dst_dims_mkl}, DnnnType<T>(), dnnl::memory::format_tag::any));
|
||||
sum_pd_ = onnxruntime::make_unique<dnnl::sum::primitive_desc>(
|
||||
dnnl::sum::primitive_desc(*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// Intermediate node. Use Dnnl kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<dnnl::memory>(
|
||||
dnnl::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
}
|
||||
primitive_dst_desc_ = sum_pd_->dst_desc();
|
||||
|
||||
auto c = mkldnn::sum(*sum_pd_);
|
||||
auto c = dnnl::sum(*sum_pd_);
|
||||
net.push_back(c);
|
||||
std::unordered_map<int, mkldnn::memory> args{
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}};
|
||||
std::unordered_map<int, dnnl::memory> args{
|
||||
{DNNL_ARG_DST, *primitive_dst_mem_}};
|
||||
for (int i = 0; i < (int)num_inputs; i++) {
|
||||
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, srcs_memory_[i]});
|
||||
args.insert({DNNL_ARG_MULTIPLE_SRC + i, srcs_memory_[i]});
|
||||
}
|
||||
net_args.push_back(args);
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
// reorder is necessary
|
||||
mkldnn::memory::data_type t = MklDnnType<T>();
|
||||
dnnl::memory::data_type t = DnnnType<T>();
|
||||
InitDstReorderOutput(cpu_engine, t, net, net_args);
|
||||
}
|
||||
}
|
||||
|
@ -162,14 +162,14 @@ class MklDnnSum : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::vector<mkldnn::memory::desc> src_mds_;
|
||||
std::vector<mkldnn::memory> srcs_memory_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_md_;
|
||||
std::vector<dnnl::memory::desc> src_mds_;
|
||||
std::vector<dnnl::memory> srcs_memory_;
|
||||
|
||||
std::vector<mkldnn::memory::desc> srcs_pd_;
|
||||
std::unique_ptr<mkldnn::memory::desc> src_mpd_;
|
||||
std::unique_ptr<mkldnn::memory::desc> dst_pd_;
|
||||
std::unique_ptr<mkldnn::sum::primitive_desc> sum_pd_;
|
||||
std::vector<dnnl::memory::desc> srcs_pd_;
|
||||
std::unique_ptr<dnnl::memory::desc> src_mpd_;
|
||||
std::unique_ptr<dnnl::memory::desc> dst_pd_;
|
||||
std::unique_ptr<dnnl::sum::primitive_desc> sum_pd_;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -10,9 +10,9 @@
|
|||
#include "core/graph/graph.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
namespace ort_dnnl {
|
||||
|
||||
struct MklDnnNode {
|
||||
struct DnnlNode {
|
||||
std::string name;
|
||||
int node_index = -1;
|
||||
int input_start_index = -1; // start index in inputs()
|
||||
|
@ -63,7 +63,7 @@ struct Subgraph {
|
|||
|
||||
std::string graph_name;
|
||||
std::string subgraph_id;
|
||||
std::vector<MklDnnNode> mkldnn_nodes;
|
||||
std::vector<DnnlNode> dnnl_nodes;
|
||||
};
|
||||
} // namespace mkl_dnn
|
||||
} // namespace ort_dnnl
|
||||
} // namespace onnxruntime
|
|
@ -0,0 +1 @@
|
|||
OrtSessionOptionsAppendExecutionProvider_Dnnl
|
|
@ -1,38 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/mkldnn/mkldnn_provider_factory.h"
|
||||
#include <atomic>
|
||||
#include "mkldnn_execution_provider.h"
|
||||
#include "core/session/abi_session_options_impl.h"
|
||||
|
||||
using namespace onnxruntime;
|
||||
|
||||
namespace onnxruntime {
|
||||
struct MkldnnProviderFactory : IExecutionProviderFactory {
|
||||
MkldnnProviderFactory(bool create_arena) : create_arena_(create_arena) {}
|
||||
~MkldnnProviderFactory() override {}
|
||||
|
||||
std::unique_ptr<IExecutionProvider> CreateProvider() override;
|
||||
|
||||
private:
|
||||
bool create_arena_;
|
||||
};
|
||||
|
||||
std::unique_ptr<IExecutionProvider> MkldnnProviderFactory::CreateProvider() {
|
||||
MKLDNNExecutionProviderInfo info;
|
||||
info.create_arena = create_arena_;
|
||||
return onnxruntime::make_unique<MKLDNNExecutionProvider>(info);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Mkldnn(int device_id) {
|
||||
return std::make_shared<onnxruntime::MkldnnProviderFactory>(device_id);
|
||||
//TODO: This is apparently a bug. The consructor parameter is create-arena-flag, not the device-id
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
||||
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Mkldnn, _In_ OrtSessionOptions* options, int use_arena) {
|
||||
options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_Mkldnn(use_arena));
|
||||
return nullptr;
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
// Copyright(C) 2019 Intel Corporation
|
||||
// Licensed under the MIT License
|
||||
|
||||
#include "mkldnn_kernel.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace mkl_dnn {
|
||||
|
||||
void MklDnnKernel::InitDstReorderOutput(mkldnn::engine& cpu_engine,
|
||||
mkldnn::memory::data_type& data_type,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
|
||||
// Allocate dst buffer if reorder is necessary
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder to ONNXRuntime format
|
||||
mkldnn::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
mkldnn::memory::desc dst_des = mkldnn::memory::desc(dst_dims_mkl,
|
||||
data_type, ort_source_format_);
|
||||
reorder_dst_mem_to_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(dst_des, cpu_engine));
|
||||
net.push_back(mkldnn::reorder(*primitive_dst_mem_, *reorder_dst_mem_to_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *primitive_dst_mem_},
|
||||
{MKLDNN_ARG_TO, *reorder_dst_mem_to_}});
|
||||
}
|
||||
}
|
||||
|
||||
mkldnn::memory::format_tag MklDnnKernel::GetSourceFormat(int dim_size) {
|
||||
mkldnn::memory::format_tag source_format = mkldnn::memory::format_tag::any;
|
||||
switch (dim_size) {
|
||||
case 1: {
|
||||
source_format = mkldnn::memory::format_tag::x;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
source_format = mkldnn::memory::format_tag::nc;
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
source_format = mkldnn::memory::format_tag::ntc;
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
source_format = mkldnn::memory::format_tag::nchw;
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
source_format = mkldnn::memory::format_tag::ncdhw;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
source_format = mkldnn::memory::format_tag::any;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return source_format;
|
||||
}
|
||||
|
||||
} // namespace mkl_dnn
|
||||
} // namespace onnxruntime
|
|
@ -1 +0,0 @@
|
|||
OrtSessionOptionsAppendExecutionProvider_Mkldnn
|
|
@ -27,11 +27,11 @@
|
|||
#define BACKEND_OPENMP ""
|
||||
#endif
|
||||
|
||||
#if USE_MKLDNN
|
||||
#define BACKEND_MKLDNN "-MKL-DNN"
|
||||
#include "core/providers/mkldnn/mkldnn_execution_provider.h"
|
||||
#if USE_DNNL
|
||||
#define BACKEND_DNNL "-DNNL"
|
||||
#include "core/providers/dnnl/dnnl_execution_provider.h"
|
||||
#else
|
||||
#define BACKEND_MKLDNN ""
|
||||
#define BACKEND_DNNL ""
|
||||
#endif
|
||||
|
||||
#if USE_MKLML
|
||||
|
@ -78,7 +78,7 @@
|
|||
#define BACKEND_OPENBLAS ""
|
||||
#endif
|
||||
|
||||
#define BACKEND_DEVICE BACKEND_PROC BACKEND_MKLDNN BACKEND_MKLML BACKEND_NGRAPH BACKEND_OPENVINO BACKEND_NUPHAR BACKEND_OPENBLAS
|
||||
#define BACKEND_DEVICE BACKEND_PROC BACKEND_DNNL BACKEND_MKLML BACKEND_NGRAPH BACKEND_OPENVINO BACKEND_NUPHAR BACKEND_OPENBLAS
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "core/providers/providers.h"
|
||||
#include "core/providers/cpu/cpu_execution_provider.h"
|
||||
|
@ -90,8 +90,8 @@
|
|||
#ifdef USE_TENSORRT
|
||||
#include "core/providers/tensorrt/tensorrt_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_MKLDNN
|
||||
#include "core/providers/mkldnn/mkldnn_provider_factory.h"
|
||||
#ifdef USE_DNNL
|
||||
#include "core/providers/dnnl/dnnl_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_NGRAPH
|
||||
#include "core/providers/ngraph/ngraph_provider_factory.h"
|
||||
|
@ -111,7 +111,7 @@ namespace onnxruntime {
|
|||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CPU(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CUDA(int device_id);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Mkldnn(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_NGraph(const char* ng_backend_type);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
|
||||
|
@ -256,7 +256,7 @@ inline void RegisterExecutionProvider(InferenceSession* sess, onnxruntime::IExec
|
|||
|
||||
// ordered by default priority. highest to lowest.
|
||||
const std::vector<std::string>& GetAllProviders() {
|
||||
static std::vector<std::string> all_providers = {kTensorrtExecutionProvider, kCudaExecutionProvider, kMklDnnExecutionProvider,
|
||||
static std::vector<std::string> all_providers = {kTensorrtExecutionProvider, kCudaExecutionProvider, kDnnlExecutionProvider,
|
||||
kNGraphExecutionProvider, kOpenVINOExecutionProvider, kNupharExecutionProvider,
|
||||
kBrainSliceExecutionProvider, kCpuExecutionProvider};
|
||||
return all_providers;
|
||||
|
@ -271,8 +271,8 @@ const std::vector<std::string>& GetAvailableProviders() {
|
|||
#ifdef USE_CUDA
|
||||
available_providers.push_back(kCudaExecutionProvider);
|
||||
#endif
|
||||
#ifdef USE_MKLDNN
|
||||
available_providers.push_back(kMklDnnExecutionProvider);
|
||||
#ifdef USE_DNNL
|
||||
available_providers.push_back(kDnnlExecutionProvider);
|
||||
#endif
|
||||
#ifdef USE_NGRAPH
|
||||
available_providers.push_back(kNGraphExecutionProvider);
|
||||
|
@ -305,9 +305,9 @@ void RegisterExecutionProviders(InferenceSession* sess, const std::vector<std::s
|
|||
// device id??
|
||||
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_CUDA(0));
|
||||
#endif
|
||||
} else if (type == kMklDnnExecutionProvider) {
|
||||
#ifdef USE_MKLDNN
|
||||
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Mkldnn(sess->GetSessionOptions().enable_cpu_mem_arena));
|
||||
} else if (type == kDnnlExecutionProvider) {
|
||||
#ifdef USE_DNNL
|
||||
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Dnnl(sess->GetSessionOptions().enable_cpu_mem_arena));
|
||||
#endif
|
||||
} else if (type == kNGraphExecutionProvider) {
|
||||
#if USE_NGRAPH
|
||||
|
@ -382,7 +382,7 @@ void addGlobalMethods(py::module& m) {
|
|||
"get_all_opkernel_def", []() -> const std::vector<onnxruntime::KernelDef> {
|
||||
std::vector<onnxruntime::KernelDef> result;
|
||||
|
||||
// default logger is needed to create the MklDNNExecutionProvider
|
||||
// default logger is needed to create the DNNLExecutionProvider
|
||||
std::string default_logger_id{"DefaultLogger"};
|
||||
std::unique_ptr<onnxruntime::logging::LoggingManager> default_logging_manager =
|
||||
onnxruntime::make_unique<LoggingManager>(
|
||||
|
@ -398,8 +398,8 @@ void addGlobalMethods(py::module& m) {
|
|||
#ifdef USE_CUDA
|
||||
onnxruntime::CreateExecutionProviderFactory_CUDA(0),
|
||||
#endif
|
||||
#ifdef USE_MKLDNN
|
||||
onnxruntime::CreateExecutionProviderFactory_Mkldnn(1),
|
||||
#ifdef USE_DNNL
|
||||
onnxruntime::CreateExecutionProviderFactory_Dnnl(1),
|
||||
#endif
|
||||
#ifdef USE_NGRAPH
|
||||
onnxruntime::CreateExecutionProviderFactory_NGraph("CPU"),
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
#include "environment.h"
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
|
||||
#ifdef USE_MKLDNN
|
||||
#ifdef USE_DNNL
|
||||
|
||||
#include "core/providers/mkldnn/mkldnn_provider_factory.h"
|
||||
#include "core/providers/dnnl/dnnl_provider_factory.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -67,8 +67,8 @@ ServerEnvironment::ServerEnvironment(OrtLoggingLevel severity, spdlog::sinks_ini
|
|||
}
|
||||
|
||||
void ServerEnvironment::RegisterExecutionProviders(){
|
||||
#ifdef USE_MKLDNN
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Mkldnn(options_, 1));
|
||||
#ifdef USE_DNNL
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Dnnl(options_, 1));
|
||||
#endif
|
||||
|
||||
#ifdef USE_NGRAPH
|
||||
|
|
|
@ -4,7 +4,7 @@ Options:
|
|||
-c [runs]: Specifies the number of Session::Run() to invoke simultaneously for each model.
|
||||
-n [test_case_name]: Specifies a single test case to run.
|
||||
-p [PLANNER_TYPE]: PLANNER_TYPE could be 'seq' or 'simple'. Default: 'simple'.
|
||||
-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'mkldnn', 'tensorrt', 'ngraph', 'nuphar' or 'acl'. Default: 'cpu'.
|
||||
-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', 'ngraph', 'nuphar' or 'acl'. Default: 'cpu'.
|
||||
-h: help
|
||||
|
||||
The debug version of this program depends on dbghelp.dll. Please make sure it's in your PATH.
|
||||
|
|
|
@ -36,7 +36,7 @@ void usage() {
|
|||
"\t-r [repeat]: Specifies the number of times to repeat\n"
|
||||
"\t-v: verbose\n"
|
||||
"\t-n [test_case_name]: Specifies a single test case to run.\n"
|
||||
"\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'mkldnn', 'tensorrt', 'ngraph', "
|
||||
"\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', 'ngraph', "
|
||||
"'openvino', 'nuphar' or 'acl'. "
|
||||
"Default: 'cpu'.\n"
|
||||
"\t-x: Use parallel executor, default (without -x): sequential executor.\n"
|
||||
|
@ -92,7 +92,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
int repeat_count = 1;
|
||||
int p_models = GetNumCpuCores();
|
||||
bool enable_cuda = false;
|
||||
bool enable_mkl = false;
|
||||
bool enable_dnnl = false;
|
||||
bool enable_ngraph = false;
|
||||
bool enable_nuphar = false;
|
||||
bool enable_tensorrt = false;
|
||||
|
@ -151,8 +151,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
// do nothing
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("cuda"))) {
|
||||
enable_cuda = true;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("mkldnn"))) {
|
||||
enable_mkl = true;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("dnnl"))) {
|
||||
enable_dnnl = true;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("ngraph"))) {
|
||||
enable_ngraph = true;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("nuphar"))) {
|
||||
|
@ -304,11 +304,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
return -1;
|
||||
#endif
|
||||
}
|
||||
if (enable_mkl) {
|
||||
#ifdef USE_MKLDNN
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Mkldnn(sf, enable_cpu_mem_arena ? 1 : 0));
|
||||
if (enable_dnnl) {
|
||||
#ifdef USE_DNNL
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Dnnl(sf, enable_cpu_mem_arena ? 1 : 0));
|
||||
#else
|
||||
fprintf(stderr, "MKL-DNN is not supported in this build");
|
||||
fprintf(stderr, "DNNL is not supported in this build");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
@ -356,7 +356,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
|
||||
static const char* cuda_flaky_tests[] = {"fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"};
|
||||
static const char* dml_disabled_tests[] = {"mlperf_ssd_resnet34_1200", "mlperf_ssd_mobilenet_300", "mask_rcnn_keras", "mask_rcnn", "faster_rcnn"};
|
||||
|
||||
static const char* dnnl_disabled_tests[] = {"test_densenet121", "test_resnet18v2", "test_resnet34v2", "test_resnet50v2", "test_resnet101v2",
|
||||
"test_resnet101v2", "test_vgg19", "tf_inception_resnet_v2", "tf_inception_v1", "tf_inception_v3", "tf_inception_v4", "tf_mobilenet_v1_1.0_224",
|
||||
"tf_mobilenet_v2_1.0_224", "tf_mobilenet_v2_1.4_224", "tf_nasnet_large", "tf_pnasnet_large", "tf_resnet_v1_50", "tf_resnet_v1_101", "tf_resnet_v1_101",
|
||||
"tf_resnet_v2_101", "tf_resnet_v2_152"};
|
||||
|
||||
std::unordered_set<std::string> all_disabled_tests;
|
||||
if (enable_cuda) {
|
||||
all_disabled_tests.insert(std::begin(cuda_flaky_tests), std::end(cuda_flaky_tests));
|
||||
|
@ -364,6 +368,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
if (enable_dml) {
|
||||
all_disabled_tests.insert(std::begin(dml_disabled_tests), std::end(dml_disabled_tests));
|
||||
}
|
||||
if (enable_dnnl) {
|
||||
// these models run but disabled tests to keep memory utilization low
|
||||
// This will be removed after LRU implementation
|
||||
all_disabled_tests.insert(std::begin(dnnl_disabled_tests), std::end(dnnl_disabled_tests));
|
||||
}
|
||||
#if defined(__amd64__) || defined(_M_AMD64)
|
||||
#else
|
||||
//out of memory
|
||||
|
@ -465,14 +474,18 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
broken_tests.insert({"candy", "Results mismatch: 2 of 150528"});
|
||||
#endif
|
||||
|
||||
#ifdef USE_MKLDNN
|
||||
#ifdef USE_DNNL
|
||||
broken_tests.insert({"tf_mobilenet_v2_1.0_224", "result mismatch"});
|
||||
broken_tests.insert({"tf_mobilenet_v2_1.4_224", "result mismatch"});
|
||||
broken_tests.insert({"tf_mobilenet_v1_1.0_224", "result mismatch"});
|
||||
broken_tests.insert({"mobilenetv2-1.0", "result mismatch"});
|
||||
broken_tests.insert({"candy", "result mismatch"});
|
||||
broken_tests.insert({"range_float_type_positive_delta_expanded", "get unknown exception from MKLDNN EP"});
|
||||
broken_tests.insert({"range_int32_type_negative_delta_expanded", "get unknown exception from MKLDNN EP"});
|
||||
broken_tests.insert({"range_float_type_positive_delta_expanded", "get unknown exception from DNNL EP"});
|
||||
broken_tests.insert({"range_int32_type_negative_delta_expanded", "get unknown exception from DNNL EP"});
|
||||
broken_tests.insert({"averagepool_2d_ceil", "maxpool ceiling not supported"});
|
||||
broken_tests.insert({"maxpool_2d_ceil", "maxpool ceiling not supported"});
|
||||
broken_tests.insert({"maxpool_2d_dilations", "maxpool dilations not supported"});
|
||||
broken_tests.insert({"mlperf_ssd_resnet34_1200", "test pass on dev box but fails on CI build"});
|
||||
#endif
|
||||
|
||||
#ifdef USE_OPENVINO
|
||||
|
|
|
@ -14,7 +14,7 @@ Options:
|
|||
|
||||
-c: [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.
|
||||
|
||||
-e: [cpu|cuda|mkldnn|tensorrt|ngraph|openvino|nuphar|acl]: Specifies the execution provider 'cpu','cuda','mkldnn','tensorrt', 'ngraph', 'openvino', 'nuphar' or 'acl'. Default is 'cpu'.
|
||||
-e: [cpu|cuda|mkldnn|tensorrt|ngraph|openvino|nuphar|acl]: Specifies the execution provider 'cpu','cuda','dnnn','tensorrt', 'ngraph', 'openvino', 'nuphar' or 'acl'. Default is 'cpu'.
|
||||
|
||||
-m: [test_mode]: Specifies the test mode. Value coulde be 'duration' or 'times'. Provide 'duration' to run the test for a fix duration, and 'times' to repeated for a certain times. Default:'duration'.
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace perftest {
|
|||
"\t-M: Disable memory pattern.\n"
|
||||
"\t-A: Disable memory arena\n"
|
||||
"\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n"
|
||||
"\t-e [cpu|cuda|mkldnn|tensorrt|ngraph|openvino|nuphar|dml|acl]: Specifies the provider 'cpu','cuda','mkldnn','tensorrt', "
|
||||
"\t-e [cpu|cuda|dnnl|tensorrt|ngraph|openvino|nuphar|dml|acl]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', "
|
||||
"'ngraph', 'openvino', 'nuphar', 'dml' or 'acl'. "
|
||||
"Default:'cpu'.\n"
|
||||
"\t-b [tf|ort]: backend to use. Default:ort\n"
|
||||
|
@ -41,7 +41,7 @@ namespace perftest {
|
|||
"\t-p [profile_file]: Specifies the profile name to enable profiling and dump the profile data to the file.\n"
|
||||
"\t-s: Show statistics result, like P75, P90.\n"
|
||||
"\t-v: Show verbose information.\n"
|
||||
"\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0. If OpenMP is enabled, this configuration will be ignored.\n"
|
||||
"\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0.\n"
|
||||
"\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n"
|
||||
"\t-P: Use parallel executor instead of sequential executor.\n"
|
||||
"\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
|
||||
|
@ -80,8 +80,8 @@ namespace perftest {
|
|||
test_config.machine_config.provider_type_name = onnxruntime::kCpuExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("cuda"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kCudaExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("mkldnn"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kMklDnnExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("dnnl"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("ngraph"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kNGraphExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("brainslice"))) {
|
||||
|
|
|
@ -30,11 +30,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
: rand_engine_(rd()), input_names_(m->GetInputCount()), input_length_(m->GetInputCount()) {
|
||||
Ort::SessionOptions session_options;
|
||||
const std::string& provider_name = performance_test_config.machine_config.provider_type_name;
|
||||
if (provider_name == onnxruntime::kMklDnnExecutionProvider) {
|
||||
#ifdef USE_MKLDNN
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Mkldnn(session_options, performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0));
|
||||
if (provider_name == onnxruntime::kDnnlExecutionProvider) {
|
||||
#ifdef USE_DNNL
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Dnnl(session_options, performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0));
|
||||
#else
|
||||
ORT_THROW("MKL-DNN is not supported in this build\n");
|
||||
ORT_THROW("DNNL is not supported in this build\n");
|
||||
#endif
|
||||
} else if (provider_name == onnxruntime::kNGraphExecutionProvider) {
|
||||
#ifdef USE_NGRAPH
|
||||
|
|
|
@ -216,7 +216,7 @@ bool PerformanceRunner::Initialize() {
|
|||
|
||||
test_case_.reset(CreateOnnxTestCase(narrow_model_name, test_model_info_, 0.0, 0.0));
|
||||
|
||||
// TODO: Place input tensor on cpu memory if mkldnn provider type to avoid CopyTensor logic in CopyInputAcrossDevices
|
||||
// TODO: Place input tensor on cpu memory if dnnl provider type to avoid CopyTensor logic in CopyInputAcrossDevices
|
||||
size_t test_data_count = test_case_->GetDataCount();
|
||||
if (test_data_count == 0) {
|
||||
std::cout << "there is no test data for model " << test_case_->GetTestCaseName() << std::endl;
|
||||
|
|
|
@ -235,7 +235,7 @@ TEST(GemmOpTest, GemmEmptyTensor) {
|
|||
test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
|
||||
test.AddOutput<float>("Y", {0, 3},
|
||||
{});
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kMklDnnExecutionProvider}); //TensorRT: doesn't support dynamic shape yet
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kDnnlExecutionProvider}); //TensorRT: doesn't support dynamic shape yet
|
||||
}
|
||||
|
||||
TEST(GemmOpTest, GemmNoBiasOpset11) {
|
||||
|
|
|
@ -162,7 +162,7 @@ static void MaxPool_8_WithIndexTest(bool has_index, int64_t storage_order = 0) {
|
|||
storage_order == 0 ? test.AddOutput<int64_t>("Indices", expected_dims, expected_indices_row)
|
||||
: test.AddOutput<int64_t>("Indices", expected_dims, expected_indices_col);
|
||||
}
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kMklDnnExecutionProvider, kTensorrtExecutionProvider, kAclExecutionProvider});
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kDnnlExecutionProvider, kTensorrtExecutionProvider, kAclExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(PoolTest, MaxPool_8_With_Index) {
|
||||
|
|
|
@ -95,7 +95,7 @@ TEST(GatherOpTest, Gather_invalid_index_gpu) {
|
|||
0.0f, 0.0f, 0.0f, 0.0f});
|
||||
|
||||
//On GPU, just set the value to 0 instead of report error. exclude all other providers
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider, kMklDnnExecutionProvider, kNupharExecutionProvider, kTensorrtExecutionProvider});
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider, kDnnlExecutionProvider, kNupharExecutionProvider, kTensorrtExecutionProvider});
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -460,7 +460,7 @@ void OpTester::ExecuteModel(Model& model, InferenceSession& session_object, Expe
|
|||
} else {
|
||||
if (expect_result == ExpectResult::kExpectFailure) {
|
||||
// Disable expected_failure_string checks for MKL-DNN and nGraph EP's
|
||||
if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
|
||||
if (provider_type != kDnnlExecutionProvider && provider_type != kNGraphExecutionProvider) {
|
||||
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
|
||||
}
|
||||
} else {
|
||||
|
@ -569,7 +569,7 @@ void OpTester::Run(SessionOptions so, // Take the SessionOptions by value (i.e.
|
|||
static const std::string all_provider_types[] = {
|
||||
kCpuExecutionProvider,
|
||||
kCudaExecutionProvider,
|
||||
kMklDnnExecutionProvider,
|
||||
kDnnlExecutionProvider,
|
||||
kNGraphExecutionProvider,
|
||||
kNupharExecutionProvider,
|
||||
kBrainSliceExecutionProvider,
|
||||
|
@ -622,8 +622,8 @@ void OpTester::Run(SessionOptions so, // Take the SessionOptions by value (i.e.
|
|||
execution_provider = DefaultCpuExecutionProvider();
|
||||
else if (provider_type == onnxruntime::kCudaExecutionProvider)
|
||||
execution_provider = DefaultCudaExecutionProvider();
|
||||
else if (provider_type == onnxruntime::kMklDnnExecutionProvider)
|
||||
execution_provider = DefaultMkldnnExecutionProvider();
|
||||
else if (provider_type == onnxruntime::kDnnlExecutionProvider)
|
||||
execution_provider = DefaultDnnlExecutionProvider();
|
||||
else if (provider_type == onnxruntime::kNGraphExecutionProvider)
|
||||
execution_provider = DefaultNGraphExecutionProvider();
|
||||
else if (provider_type == onnxruntime::kNupharExecutionProvider)
|
||||
|
|
|
@ -139,9 +139,12 @@ def create_backend_test(testname=None):
|
|||
'test_edge_pad_cpu',
|
||||
'test_reflect_pad_cpu']
|
||||
|
||||
if c2.supports_device('MKL-DNN'):
|
||||
if c2.supports_device('DNNL'):
|
||||
current_failing_tests += ['^test_range_float_type_positive_delta_expanded_cpu',
|
||||
'^test_range_int32_type_negative_delta_expanded_cpu']
|
||||
'^test_range_int32_type_negative_delta_expanded_cpu',
|
||||
'^test_averagepool_2d_ceil_cpu',
|
||||
'^test_maxpool_2d_ceil_cpu',
|
||||
'^test_maxpool_2d_dilations_cpu']
|
||||
|
||||
if c2.supports_device('OPENVINO_GPU_FP32') or c2.supports_device('OPENVINO_GPU_FP16'):
|
||||
current_failing_tests.append('^test_div_cpu*')
|
||||
|
|
|
@ -75,9 +75,9 @@ void TestInference(Ort::Env& env, T model_uri,
|
|||
return;
|
||||
#endif
|
||||
} else if (provider_type == 2) {
|
||||
#ifdef USE_MKLDNN
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Mkldnn(session_options, 1));
|
||||
std::cout << "Running simple inference with mkldnn provider" << std::endl;
|
||||
#ifdef USE_DNNL
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Dnnl(session_options, 1));
|
||||
std::cout << "Running simple inference with dnnl provider" << std::endl;
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace onnxruntime {
|
|||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CPU(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CUDA(int device_id);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Mkldnn(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_NGraph(const char* ng_backend_type);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_BrainSlice(uint32_t ip, int, int, bool, const char*, const char*, const char*);
|
||||
|
@ -49,9 +49,9 @@ std::unique_ptr<IExecutionProvider> DefaultCudaExecutionProvider() {
|
|||
#endif
|
||||
}
|
||||
|
||||
std::unique_ptr<IExecutionProvider> DefaultMkldnnExecutionProvider(bool enable_arena) {
|
||||
#ifdef USE_MKLDNN
|
||||
return CreateExecutionProviderFactory_Mkldnn(enable_arena ? 1 : 0)->CreateProvider();
|
||||
std::unique_ptr<IExecutionProvider> DefaultDnnlExecutionProvider(bool enable_arena) {
|
||||
#ifdef USE_DNNL
|
||||
return CreateExecutionProviderFactory_Dnnl(enable_arena ? 1 : 0)->CreateProvider();
|
||||
#else
|
||||
ORT_UNUSED_PARAMETER(enable_arena);
|
||||
return nullptr;
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace test {
|
|||
// unique_ptr providers with default values for session registration
|
||||
std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_arena = true);
|
||||
std::unique_ptr<IExecutionProvider> DefaultCudaExecutionProvider();
|
||||
std::unique_ptr<IExecutionProvider> DefaultMkldnnExecutionProvider(bool enable_arena = true);
|
||||
std::unique_ptr<IExecutionProvider> DefaultDnnlExecutionProvider(bool enable_arena = true);
|
||||
std::unique_ptr<IExecutionProvider> DefaultNGraphExecutionProvider();
|
||||
std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_unaligned_buffers = true);
|
||||
std::unique_ptr<IExecutionProvider> DefaultBrainSliceExecutionProvider();
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
#ifdef USE_CUDA
|
||||
#include "core/providers/cuda/cuda_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_MKLDNN
|
||||
#include "core/providers/mkldnn/mkldnn_provider_factory.h"
|
||||
#ifdef USE_DNNL
|
||||
#include "core/providers/dnnl/dnnl_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_NGRAPH
|
||||
#include "core/providers/ngraph/ngraph_provider_factory.h"
|
||||
|
|
|
@ -16,7 +16,7 @@ endif()
|
|||
option(onnxruntime_USE_CUDA "Build with CUDA support" OFF)
|
||||
option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF)
|
||||
option(onnxruntime_USE_NNAPI "Build with DNNLibrary for Android NNAPI support" OFF)
|
||||
option(onnxruntime_USE_MKLDNN "Build with MKL-DNN support" OFF)
|
||||
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
|
||||
option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF)
|
||||
option(onnxruntime_USE_NUPHAR "Build with Nuphar" OFF)
|
||||
option(onnxruntime_USE_BRAINSLICE "Build with BrainSlice" OFF)
|
||||
|
@ -64,8 +64,8 @@ endif()
|
|||
if(onnxruntime_USE_NNAPI)
|
||||
add_definitions(-DUSE_NNAPI)
|
||||
endif()
|
||||
if(onnxruntime_USE_MKLDNN)
|
||||
add_definitions(-DUSE_MKLDNN)
|
||||
if(onnxruntime_USE_DNNL)
|
||||
add_definitions(-DUSE_DNNL)
|
||||
endif()
|
||||
if(onnxruntime_USE_NGRAPH)
|
||||
add_definitions(-DUSE_NGRAPH)
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
#ifdef USE_CUDA
|
||||
#include "onnxruntime/core/providers/cuda/cuda_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_MKLDNN
|
||||
#include "onnxruntime/core/providers/mkldnn/mkldnn_provider_factory.h"
|
||||
#ifdef USE_DNNL
|
||||
#include "onnxruntime/core/providers/dnnl/dnnl_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_NGRAPH
|
||||
#include "onnxruntime/core/providers/ngraph/ngraph_provider_factory.h"
|
||||
|
|
8
setup.py
8
setup.py
|
@ -117,9 +117,9 @@ except ImportError as error:
|
|||
|
||||
# Additional binaries
|
||||
if platform.system() == 'Linux':
|
||||
libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.so.1', 'libmklml_intel.so', 'libiomp5.so', 'mimalloc.so']
|
||||
libs = ['onnxruntime_pybind11_state.so', 'libdnnl.so.1', 'libmklml_intel.so', 'libiomp5.so', 'mimalloc.so']
|
||||
# nGraph Libs
|
||||
libs.extend(['libngraph.so', 'libcodegen.so', 'libcpu_backend.so', 'libmkldnn.so', 'libtbb_debug.so', 'libtbb_debug.so.2', 'libtbb.so', 'libtbb.so.2'])
|
||||
libs.extend(['libngraph.so', 'libcodegen.so', 'libcpu_backend.so', 'libdnnl.so', 'libtbb_debug.so', 'libtbb_debug.so.2', 'libtbb.so', 'libtbb.so.2'])
|
||||
# Nuphar Libs
|
||||
libs.extend(['libtvm.so.0.5.1'])
|
||||
# Openvino Libs
|
||||
|
@ -127,11 +127,11 @@ if platform.system() == 'Linux':
|
|||
if nightly_build:
|
||||
libs.extend(['libonnxruntime_pywrapper.so'])
|
||||
elif platform.system() == "Darwin":
|
||||
libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.1.dylib', 'mimalloc.so'] # TODO add libmklml and libiomp5 later.
|
||||
libs = ['onnxruntime_pybind11_state.so', 'libdnnl.1.dylib', 'mimalloc.so'] # TODO add libmklml and libiomp5 later.
|
||||
if nightly_build:
|
||||
libs.extend(['libonnxruntime_pywrapper.dylib'])
|
||||
else:
|
||||
libs = ['onnxruntime_pybind11_state.pyd', 'mkldnn.dll', 'mklml.dll', 'libiomp5md.dll']
|
||||
libs = ['onnxruntime_pybind11_state.pyd', 'dnnl.dll', 'mklml.dll', 'libiomp5md.dll']
|
||||
libs.extend(['ngraph.dll', 'cpu_backend.dll', 'tbb.dll', 'mimalloc-override.dll', 'mimalloc-redirect.dll', 'mimalloc-redirect32.dll'])
|
||||
# Nuphar Libs
|
||||
libs.extend(['tvm.dll'])
|
||||
|
|
|
@ -124,7 +124,7 @@ Use the individual flags to only run the specified stages.
|
|||
parser.add_argument("--use_jemalloc", action='store_true', help="Use jemalloc.")
|
||||
parser.add_argument("--use_mimalloc", action='store_true', help="Use mimalloc.")
|
||||
parser.add_argument("--use_openblas", action='store_true', help="Build with OpenBLAS.")
|
||||
parser.add_argument("--use_mkldnn", action='store_true', help="Build with MKLDNN.")
|
||||
parser.add_argument("--use_dnnl", action='store_true', help="Build with DNNL.")
|
||||
parser.add_argument("--use_mklml", action='store_true', help="Build with MKLML.")
|
||||
parser.add_argument("--use_gemmlowp", action='store_true', help="Build with gemmlowp for quantized gemm.")
|
||||
parser.add_argument("--use_automl", action='store_true', help="Build with AutoML support.")
|
||||
|
@ -295,7 +295,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
|
|||
"-Donnxruntime_BUILD_SHARED_LIB=" + ("ON" if args.build_shared_lib or args.build_server else "OFF"),
|
||||
"-Donnxruntime_USE_EIGEN_FOR_BLAS=" + ("OFF" if args.use_openblas else "ON"),
|
||||
"-Donnxruntime_USE_OPENBLAS=" + ("ON" if args.use_openblas else "OFF"),
|
||||
"-Donnxruntime_USE_MKLDNN=" + ("ON" if args.use_mkldnn else "OFF"),
|
||||
"-Donnxruntime_USE_DNNL=" + ("ON" if args.use_dnnl else "OFF"),
|
||||
"-Donnxruntime_USE_MKLML=" + ("ON" if args.use_mklml else "OFF"),
|
||||
"-Donnxruntime_USE_GEMMLOWP=" + ("ON" if args.use_gemmlowp else "OFF"),
|
||||
"-Donnxruntime_USE_NGRAPH=" + ("ON" if args.use_ngraph else "OFF"),
|
||||
|
@ -620,8 +620,8 @@ def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_mult
|
|||
run_subprocess([exe,'-x'] + cmd, cwd=cwd)
|
||||
|
||||
|
||||
# mkldnn temporary function for running onnx tests and model tests separately.
|
||||
def mkldnn_run_onnx_tests(build_dir, configs, onnx_test_data_dir):
|
||||
# dnnl temporary function for running onnx tests and model tests separately.
|
||||
def dnnl_run_onnx_tests(build_dir, configs, onnx_test_data_dir):
|
||||
for config in configs:
|
||||
cwd = get_config_build_dir(build_dir, config)
|
||||
if is_windows():
|
||||
|
@ -630,7 +630,7 @@ def mkldnn_run_onnx_tests(build_dir, configs, onnx_test_data_dir):
|
|||
else:
|
||||
exe = os.path.join(cwd, 'onnx_test_runner')
|
||||
model_dir = os.path.join(build_dir, "models")
|
||||
cmd_base = ['-e', 'mkldnn', '-c', '1', '-j', '1']
|
||||
cmd_base = ['-e', 'dnnl', '-c', '1', '-j', '1']
|
||||
if os.path.exists(onnx_test_data_dir):
|
||||
onnxdata_cmd = cmd_base + [onnx_test_data_dir]
|
||||
# /data/onnx
|
||||
|
@ -818,7 +818,7 @@ def generate_documentation(source_dir, build_dir, configs):
|
|||
print('git diff returned non-zero error code')
|
||||
if len(docdiff) > 0:
|
||||
# Show warning instead of throwing exception, because it is dependent on build configuration for including execution propviders
|
||||
log.warning('The updated opkernel document file '+str(opkernel_doc_path)+' is different from the checked in version. Consider regenrating the file with CPU, MKLDNN and CUDA providers enabled.')
|
||||
log.warning('The updated opkernel document file '+str(opkernel_doc_path)+' is different from the checked in version. Consider regenrating the file with CPU, DNNL and CUDA providers enabled.')
|
||||
log.debug('diff:\n'+str(docdiff))
|
||||
|
||||
docdiff = ''
|
||||
|
@ -973,9 +973,9 @@ def main():
|
|||
if args.use_dml:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'dml', args.enable_multi_device_test, False, 1)
|
||||
|
||||
#It could run out of memory because of memory leak
|
||||
#if args.use_mkldnn:
|
||||
# mkldnn_run_onnx_tests(build_dir, configs, onnx_test_data_dir)
|
||||
# Run some models are disabled to keep memory utilization under control
|
||||
if args.use_dnnl:
|
||||
dnnl_run_onnx_tests(build_dir, configs, onnx_test_data_dir)
|
||||
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, args.enable_multi_device_test, False,
|
||||
1 if args.x86 or platform.system() == 'Darwin' else 0,
|
||||
|
|
|
@ -3,7 +3,7 @@ jobs:
|
|||
parameters:
|
||||
AgentPool : 'Linux-CPU'
|
||||
JobName: 'Linux_CI_Dev'
|
||||
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_mklml --use_llvm --use_nuphar --use_mkldnn --use_tvm --use_automl --build_wheel --enable_language_interop_ops"'
|
||||
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_mklml --use_llvm --use_nuphar --use_dnnl --use_tvm --use_automl --build_wheel --enable_language_interop_ops"'
|
||||
DoNugetPack: 'false'
|
||||
ArtifactName: 'drop-linux'
|
||||
TimeoutInMinutes: 120
|
||||
|
|
|
@ -30,7 +30,7 @@ jobs:
|
|||
displayName: 'Generate cmake config'
|
||||
inputs:
|
||||
scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
|
||||
arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_automl --use_mkldnn --use_openmp --build_shared_lib --enable_onnx_tests'
|
||||
arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_automl --use_dnnl --use_openmp --build_shared_lib --enable_onnx_tests'
|
||||
workingDirectory: '$(Build.BinariesDirectory)'
|
||||
|
||||
- task: VSBuild@1
|
||||
|
@ -112,7 +112,7 @@ jobs:
|
|||
set /p WHEEL_FILENAME=<wheel_filename_file
|
||||
del wheel_filename_file
|
||||
python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
|
||||
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --use_mkldnn --build_wheel --enable_onnx_tests
|
||||
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --use_dnnl --build_wheel --enable_onnx_tests
|
||||
|
||||
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
|
||||
displayName: 'Run tests'
|
||||
|
@ -123,4 +123,4 @@ jobs:
|
|||
condition: succeeded()
|
||||
|
||||
- template: templates/clean-agent-build-directory-step.yml
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ jobs:
|
|||
AgentDemands: 'Has19H1WinSDK'
|
||||
DoDebugBuild: 'true'
|
||||
DoCompliance: 'false'
|
||||
BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_mkldnn --use_dml --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --gen_doc'
|
||||
BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_dnnl --use_dml --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --gen_doc'
|
||||
JobName: 'Windows_CI_GPU_Dev'
|
||||
DoNugetPack: 'false'
|
||||
NuPackScript : ''
|
||||
|
|
|
@ -4,7 +4,7 @@ jobs:
|
|||
AgentPool : 'Win-GPU-CUDA10'
|
||||
DoDebugBuild: 'true'
|
||||
DoCompliance: 'false'
|
||||
BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_mkldnn --use_tensorrt --tensorrt_home="C:\local\TensorRT-6.0.1.5" --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10_trt6015dll" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --gen_doc'
|
||||
BuildCommand: '$(Build.SourcesDirectory)\tools\ci_build\build.py --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --cmake_path $(Build.BinariesDirectory)\cmake\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake\bin\ctest.exe --enable_pybind --use_dnnl --use_tensorrt --tensorrt_home="C:\local\TensorRT-6.0.1.5" --build_shared_lib --build_csharp --enable_onnx_tests --use_cuda --cuda_version=10.0 --cuda_home="C:\local\cuda_10.0.130_win10_trt6015dll" --cudnn_home="C:\local\cudnn-10.0-windows10-x64-v7.3.1.20\cuda" --gen_doc'
|
||||
JobName: 'Windows_CI_GPU_Dev'
|
||||
DoNugetPack: 'false'
|
||||
NuPackScript : ''
|
||||
|
|
Загрузка…
Ссылка в новой задаче