From 66980e4646c96e3e8485bb3908808edeb76ff160 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Mon, 4 Nov 2024 16:30:50 -0800 Subject: [PATCH] Refactor the cmake code that is related to delay loading (#22646) ### Description Refactor the cmake code that is related to delay loading. Provide a cmake option to control if delay loading should be enabled or not. Disabling the option when python is enabled, due to a known issue. ### Motivation and Context ONNX Runtime's python package depends on DirectML.dll, but supposedly the DLL should be delay loaded. This PR only refactor the code. It doesn't change the behavior. --- cmake/CMakeLists.txt | 6 ++++- cmake/onnxruntime.cmake | 24 ++++++++++---------- cmake/onnxruntime_providers_dml.cmake | 5 ++-- cmake/onnxruntime_python.cmake | 21 +++++++++-------- cmake/target_delayload.cmake | 11 +++++---- tools/ci_build/github/linux/build_cuda_ci.sh | 2 +- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3ca519144e6..8d9f08cee05 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -129,6 +129,10 @@ option(onnxruntime_DONT_VECTORIZE "Do not vectorize operations in Eigen" OFF) option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF) option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node inputs and outputs when executing the model." OFF) cmake_dependent_option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB "Build dump debug information about node inputs and outputs with support for sql database." OFF "onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS" OFF) + +# When loading a delay loaded DLL, Windows searches the main EXE's folder first. +# In a Python process, it searches where python.exe lives, but it doesn't search the python package's installation folder. Therefore we cannot enable this flag when Python is enabled. +cmake_dependent_option(onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS "Delay load some of the dependent DLls that are part of the OS" ON "WIN32;NOT GDK_PLATFORM;NOT onnxruntime_ENABLE_PYTHON" OFF) option(onnxruntime_USE_DML "Build with DirectML support" OFF) option(onnxruntime_USE_MIGRAPHX "Build with AMDMIGraphX support" OFF) option(onnxruntime_USE_WINML "Build with WinML support" OFF) @@ -1363,7 +1367,7 @@ endif() #Adjust warning flags set_msvc_c_cpp_compiler_warning_level(4) -set(onnxruntime_DELAYLOAD_FLAGS "") +set(onnxruntime_DELAYLOAD_FLAGS ) include_directories( ${ONNXRUNTIME_INCLUDE_DIR} diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 9602e54f3bc..1278bb7dc9e 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -122,8 +122,8 @@ else() else() onnxruntime_add_shared_library(onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c ) endif() - if (onnxruntime_USE_CUDA) - set_property(TARGET onnxruntime APPEND_STRING PROPERTY LINK_FLAGS " -Xlinker -rpath=\\$ORIGIN") + if(NOT APPLE) + target_link_options(onnxruntime PRIVATE "LINKER:-rpath=\$ORIGIN") endif() endif() @@ -139,17 +139,17 @@ target_compile_definitions(onnxruntime PRIVATE FILE_NAME=\"onnxruntime.dll\") if(UNIX) if (APPLE) - set(ONNXRUNTIME_SO_LINK_FLAG " -Xlinker -dead_strip") + target_link_options(onnxruntime PRIVATE "LINKER:-dead_strip") elseif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set(ONNXRUNTIME_SO_LINK_FLAG " -Xlinker --version-script=${SYMBOL_FILE} -Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") + target_link_options(onnxruntime PRIVATE "LINKER:--version-script=${SYMBOL_FILE}" "LINKER:--no-undefined" "LINKER:--gc-sections") endif() else() - set(ONNXRUNTIME_SO_LINK_FLAG " -DEF:${SYMBOL_FILE}") + target_link_options(onnxruntime PRIVATE "-DEF:${SYMBOL_FILE}") endif() -if (NOT WIN32) - if (APPLE OR ${CMAKE_SYSTEM_NAME} MATCHES "^iOS") - set(ONNXRUNTIME_SO_LINK_FLAG " -Wl,-exported_symbols_list,${SYMBOL_FILE}") + +if (APPLE OR ${CMAKE_SYSTEM_NAME} MATCHES "^iOS") + target_link_options(onnxruntime PRIVATE "LINKER:-exported_symbols_list,${SYMBOL_FILE}") if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS") set_target_properties(onnxruntime PROPERTIES MACOSX_RPATH TRUE @@ -159,12 +159,10 @@ if (NOT WIN32) else() set_target_properties(onnxruntime PROPERTIES INSTALL_RPATH "@loader_path") endif() - elseif (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath='$ORIGIN'") - endif() endif() + if(CMAKE_SYSTEM_NAME STREQUAL "Android" AND onnxruntime_MINIMAL_BUILD) # target onnxruntime is a shared library, the dummy __cxa_demangle is only attach to it to avoid # affecting downstream ort library users with the behavior of dummy __cxa_demangle. So the dummy @@ -248,7 +246,9 @@ target_link_libraries(onnxruntime PRIVATE ${onnxruntime_EXTERNAL_LIBRARIES} ) -set_property(TARGET onnxruntime APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_SO_LINK_FLAG} ${onnxruntime_DELAYLOAD_FLAGS}) +if(WIN32) + target_link_options(onnxruntime PRIVATE ${onnxruntime_DELAYLOAD_FLAGS}) +endif() #See: https://cmake.org/cmake/help/latest/prop_tgt/SOVERSION.html if(NOT APPLE AND NOT WIN32) if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") diff --git a/cmake/onnxruntime_providers_dml.cmake b/cmake/onnxruntime_providers_dml.cmake index 439be882dcc..3141aa85a11 100644 --- a/cmake/onnxruntime_providers_dml.cmake +++ b/cmake/onnxruntime_providers_dml.cmake @@ -61,8 +61,9 @@ target_link_libraries(onnxruntime_providers_dml PRIVATE delayimp.lib) - if (NOT GDK_PLATFORM) - set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:DirectML.dll /DELAYLOAD:d3d12.dll /DELAYLOAD:dxgi.dll /DELAYLOAD:dxcore.dll /DELAYLOAD:api-ms-win-core-com-l1-1-0.dll /DELAYLOAD:shlwapi.dll /DELAYLOAD:oleaut32.dll /DELAYLOAD:ext-ms-win-dxcore-l1-*.dll /ignore:4199") + if (onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS AND NOT GDK_PLATFORM) + #NOTE: the flags are only applied to onnxruntime.dll and the PYD file in our python package. Our C/C++ unit tests do not use these flags. + list(APPEND onnxruntime_DELAYLOAD_FLAGS "/DELAYLOAD:DirectML.dll" "/DELAYLOAD:d3d12.dll" "/DELAYLOAD:dxgi.dll" "/DELAYLOAD:dxcore.dll" "/DELAYLOAD:api-ms-win-core-com-l1-1-0.dll" "/DELAYLOAD:shlwapi.dll" "/DELAYLOAD:oleaut32.dll" "/DELAYLOAD:ext-ms-win-dxcore-l1-*.dll" "/ignore:4199") endif() target_compile_definitions(onnxruntime_providers_dml diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 0d038d210ea..7239b245a72 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -110,17 +110,17 @@ if (onnxruntime_USE_NCCL) endif() if(APPLE) - set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list -Xlinker ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst") + target_link_options(onnxruntime_pybind11_state PRIVATE "LINKER:-exported_symbols_list,${ONNXRUNTIME_ROOT}/python/exported_symbols.lst") elseif(UNIX) if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS) - set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds -Xlinker --gc-sections") + target_link_options(onnxruntime_pybind11_state PRIVATE "LINKER:--version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds" "LINKER:--gc-sections") else() if (NOT CMAKE_SYSTEM_NAME MATCHES "AIX") - set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script.lds -Xlinker --gc-sections") + target_link_options(onnxruntime_pybind11_state PRIVATE "LINKER:--version-script=${ONNXRUNTIME_ROOT}/python/version_script.lds" "LINKER:--gc-sections") endif() endif() else() - set(ONNXRUNTIME_SO_LINK_FLAG "-DEF:${ONNXRUNTIME_ROOT}/python/pybind.def") + target_link_options(onnxruntime_pybind11_state PRIVATE "-DEF:${ONNXRUNTIME_ROOT}/python/pybind.def") endif() if (onnxruntime_ENABLE_ATEN) @@ -199,11 +199,11 @@ set(onnxruntime_pybind11_state_dependencies ${onnxruntime_EXTERNAL_DEPENDENCIES} ${pybind11_dep} ) -set_property(TARGET onnxruntime_pybind11_state APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_SO_LINK_FLAG} ${onnxruntime_DELAYLOAD_FLAGS}) + add_dependencies(onnxruntime_pybind11_state ${onnxruntime_pybind11_state_dependencies}) if (MSVC) - set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG}") + target_link_options(onnxruntime_pybind11_state PRIVATE ${onnxruntime_DELAYLOAD_FLAGS}) # if MSVC, pybind11 undefines _DEBUG in pybind11/detail/common.h, which causes the pragma in pyconfig.h # from the python installation to require the release version of the lib # e.g. from a python 3.10 install: @@ -220,14 +220,15 @@ if (MSVC) # Explicitly use the release version of the python library to make the project file consistent with this. target_link_libraries(onnxruntime_pybind11_state PRIVATE ${Python_LIBRARY_RELEASE}) elseif (APPLE) - set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG} -Xlinker -undefined -Xlinker dynamic_lookup") + # The following flag no longer works + #target_link_options(onnxruntime_pybind11_state PRIVATE "LINKER:-undefined,dynamic_lookup") set_target_properties(onnxruntime_pybind11_state PROPERTIES INSTALL_RPATH "@loader_path" BUILD_WITH_INSTALL_RPATH TRUE INSTALL_RPATH_USE_LINK_PATH FALSE) else() if (NOT CMAKE_SYSTEM_NAME MATCHES "AIX") - set_property(TARGET onnxruntime_pybind11_state APPEND_STRING PROPERTY LINK_FLAGS " -Xlinker -rpath=\\$ORIGIN") + target_link_options(onnxruntime_pybind11_state PRIVATE "LINKER:-rpath=\$ORIGIN") endif() endif() @@ -238,8 +239,8 @@ if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS) MATH(EXPR PROTOBUF_INDEX_NEXT "${PROTOBUF_INDEX} + 1") if (ONNX_INDEX GREATER_EQUAL 0 AND PROTOBUF_INDEX GREATER_EQUAL 0) # Expect protobuf to follow onnx due to dependence - list(INSERT onnxruntime_CUSTOM_EXTERNAL_LIBRARIES ${ONNX_INDEX} "-Wl,--no-as-needed") - list(INSERT onnxruntime_CUSTOM_EXTERNAL_LIBRARIES ${PROTOBUF_INDEX_NEXT} "-Wl,--as-needed") + list(INSERT onnxruntime_CUSTOM_EXTERNAL_LIBRARIES ${ONNX_INDEX} "LINKER:--no-as-needed") + list(INSERT onnxruntime_CUSTOM_EXTERNAL_LIBRARIES ${PROTOBUF_INDEX_NEXT} "LINKER:--as-needed") else() message(FATAL_ERROR "Required external libraries onnx and protobuf are not found in onnxruntime_EXTERNAL_LIBRARIES") endif() diff --git a/cmake/target_delayload.cmake b/cmake/target_delayload.cmake index 53f252a3e71..92273f54242 100644 --- a/cmake/target_delayload.cmake +++ b/cmake/target_delayload.cmake @@ -6,9 +6,12 @@ function(target_delayload target_name) if(NOT MSVC) message(SEND_ERROR "Delayloading is only supported in MSVC") endif() - foreach(lib ${ARGN}) - target_link_options(${target_name} PRIVATE /DELAYLOAD:"${lib}") - endforeach() + if(onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS) + foreach(lib ${ARGN}) + target_link_options(${target_name} PRIVATE /DELAYLOAD:"${lib}") + endforeach() - target_link_libraries(${target_name} PRIVATE delayimp.lib) + target_link_libraries(${target_name} PRIVATE delayimp.lib) + endif() endfunction() + diff --git a/tools/ci_build/github/linux/build_cuda_ci.sh b/tools/ci_build/github/linux/build_cuda_ci.sh index 6b155da0203..a78e2409983 100755 --- a/tools/ci_build/github/linux/build_cuda_ci.sh +++ b/tools/ci_build/github/linux/build_cuda_ci.sh @@ -33,7 +33,7 @@ fi if [ -x "$(command -v ccache)" ]; then ccache -s; - BUILD_ARGS+=("--use_cache") + #BUILD_ARGS+=("--use_cache") fi if [ -f /opt/python/cp312-cp312/bin/python3 ]; then /opt/python/cp312-cp312/bin/python3 tools/ci_build/build.py "${BUILD_ARGS[@]}"