Merge branch 'main' into feature/cvt-model-opset

2024-03-27 10:08:37 -07:00 · 2024-03-27 10:08:37 -07:00 · 3118a44397
--- a/.pyproject/cmdclass.py
+++ b/.pyproject/cmdclass.py
@ -38,9 +38,10 @@ def _load_cuda_version():

 def _load_nvidia_smi():
    try:
-        output = subprocess.check_output(
+        outputs = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader,nounits"],
-            stderr=subprocess.STDOUT).decode("utf-8")
+            stderr=subprocess.STDOUT).decode("utf-8").splitlines()
+        output = outputs[0] if outputs else ""
        arch = output.strip().replace('.', '')
        return arch if arch.isdigit() else None
    except (subprocess.CalledProcessError, OSError):
--- a/cmake/ext_cuda.cmake
+++ b/cmake/ext_cuda.cmake
@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.

+find_package(CUDAToolkit)
 enable_language(CUDA)

 set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
--- a/cmake/ext_tests.cmake
+++ b/cmake/ext_tests.cmake
@ -60,7 +60,7 @@ function(add_test_target)
                          gtest gmock)

    if(OCOS_USE_CUDA)
-      target_link_directories(${ARG_TARGET} PRIVATE $ENV{CUDA_PATH}/lib64)
+      target_link_directories(${ARG_TARGET} PRIVATE ${CUDAToolkit_LIBRARY_DIR})
    endif()

    set(test_data_destination_root_directory ${onnxruntime_extensions_BINARY_DIR})
--- a/docs/development.md
+++ b/docs/development.md
@ -15,9 +15,10 @@ The package contains all custom operators and some Python scripts to manipulate
  - use-cuda: enable CUDA kernel build in Python package.
  - no-azure: disable AzureOp kernel build in Python package.
  - no-opencv: disable operators based on OpenCV in build.
-  - cc-debug: Generate debug info for extensions binaries and disable C/C++ compiler optimization.
+  - cc-debug: generate debug info for extensions binaries and disable C/C++ compiler optimization.
+  - cuda-archs: specify the CUDA architectures(like 70, 85, etc.), and the multiple values can be combined with semicolon. The default value is nvidia-smi util output of GPU-0

-   For example:`pip install . --config-settings "ortx-user-option=use-cuda,cc-debug" `, This command builds CUDA kernels into the package and installs it, accompanied by the generation of debug information.
+  For example:`pip install . --config-settings "ortx-user-option=use-cuda,cc-debug" `, This command builds CUDA kernels into the package and installs it, accompanied by the generation of debug information.

 Test:

@ -59,6 +60,9 @@ For any alternative scenarios, execute the following commands:

 The generated DLL or library is typically located in the `out/<OS>/<FLAVOR>` directory. To validate the build, utilize the unit tests available in the `test/test_static_test` and `test/shared_test` directories.

+**CUDA Build**  
+The cuda build can be enabled with -DOCOS_USE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=<arch>
+
 **VC Runtime static linkage**  
 If you want to build the binary with VC Runtime static linkage, please add a parameter _-DCMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded$<$<CONFIG:Debug>:Debug>"_ when running build.bat