diff --git a/.pyproject/cmdclass.py b/.pyproject/cmdclass.py index 04a69120..16855a6b 100644 --- a/.pyproject/cmdclass.py +++ b/.pyproject/cmdclass.py @@ -38,9 +38,10 @@ def _load_cuda_version(): def _load_nvidia_smi(): try: - output = subprocess.check_output( + outputs = subprocess.check_output( ["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader,nounits"], - stderr=subprocess.STDOUT).decode("utf-8") + stderr=subprocess.STDOUT).decode("utf-8").splitlines() + output = outputs[0] if outputs else "" arch = output.strip().replace('.', '') return arch if arch.isdigit() else None except (subprocess.CalledProcessError, OSError): diff --git a/cmake/ext_cuda.cmake b/cmake/ext_cuda.cmake index ac48dcb8..fddd3272 100644 --- a/cmake/ext_cuda.cmake +++ b/cmake/ext_cuda.cmake @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +find_package(CUDAToolkit) enable_language(CUDA) set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) diff --git a/cmake/ext_tests.cmake b/cmake/ext_tests.cmake index fe710401..95d257dc 100644 --- a/cmake/ext_tests.cmake +++ b/cmake/ext_tests.cmake @@ -60,7 +60,7 @@ function(add_test_target) gtest gmock) if(OCOS_USE_CUDA) - target_link_directories(${ARG_TARGET} PRIVATE $ENV{CUDA_PATH}/lib64) + target_link_directories(${ARG_TARGET} PRIVATE ${CUDAToolkit_LIBRARY_DIR}) endif() set(test_data_destination_root_directory ${onnxruntime_extensions_BINARY_DIR}) diff --git a/docs/development.md b/docs/development.md index 86e8610b..42a4cbd5 100644 --- a/docs/development.md +++ b/docs/development.md @@ -15,9 +15,10 @@ The package contains all custom operators and some Python scripts to manipulate - use-cuda: enable CUDA kernel build in Python package. - no-azure: disable AzureOp kernel build in Python package. - no-opencv: disable operators based on OpenCV in build. - - cc-debug: Generate debug info for extensions binaries and disable C/C++ compiler optimization. + - cc-debug: generate debug info for extensions binaries and disable C/C++ compiler optimization. + - cuda-archs: specify the CUDA architectures(like 70, 85, etc.), and the multiple values can be combined with semicolon. The default value is nvidia-smi util output of GPU-0 - For example:`pip install . --config-settings "ortx-user-option=use-cuda,cc-debug" `, This command builds CUDA kernels into the package and installs it, accompanied by the generation of debug information. + For example:`pip install . --config-settings "ortx-user-option=use-cuda,cc-debug" `, This command builds CUDA kernels into the package and installs it, accompanied by the generation of debug information. Test: @@ -59,6 +60,9 @@ For any alternative scenarios, execute the following commands: The generated DLL or library is typically located in the `out//` directory. To validate the build, utilize the unit tests available in the `test/test_static_test` and `test/shared_test` directories. +**CUDA Build** +The cuda build can be enabled with -DOCOS_USE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES= + **VC Runtime static linkage** If you want to build the binary with VC Runtime static linkage, please add a parameter _-DCMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded$<$:Debug>"_ when running build.bat