Enhancing CUDA Support in Python Package Build and Testing (#608)

* initial commit

* Add the cuda support for python package

* formt the code

* refine it a little bit
This commit is contained in:
Wenbing Li 2023-11-27 15:39:52 -08:00 коммит произвёл GitHub
Родитель 12ea73d365
Коммит fb2a8c2841
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 453 добавлений и 187 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -52,3 +52,4 @@ java/hs_*.log
*.pyd *.pyd
/test/data/ppp_vision/*.updated.onnx /test/data/ppp_vision/*.updated.onnx
/test/data/generated/ /test/data/generated/
/CMakeSettings.json

34
.pyproject/backend.py Normal file
Просмотреть файл

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
###########################################################################
import os
import sys
from setuptools import build_meta as _orig
from setuptools.build_meta import * # noqa: F403
# add the current directory to the path, so we can import setup_cmds.py
sys.path.append(os.path.dirname(__file__))
import cmdclass as _cmds # noqa: E402
def build_wheel(wheel_directory, config_settings=None,
metadata_directory=None):
_cmds.CommandMixin.config_settings = config_settings
return _orig.build_wheel(
wheel_directory, config_settings,
metadata_directory
)
def build_editable(wheel_directory, config_settings=None,
metadata_directory=None):
_cmds.CommandMixin.config_settings = config_settings
return _orig.build_editable(
wheel_directory, config_settings,
metadata_directory
)

275
.pyproject/cmdclass.py Normal file
Просмотреть файл

@ -0,0 +1,275 @@
# -*- coding: utf-8 -*-
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
###########################################################################
import re
import os
import sys
import pathlib
import subprocess
from textwrap import dedent
from setuptools.command.build import build as _build
from setuptools.command.build_ext import build_ext as _build_ext
from setuptools.command.develop import develop as _develop
VSINSTALLDIR_NAME = 'VSINSTALLDIR'
ORTX_USER_OPTION = 'ortx-user-option'
def _load_cuda_version():
pattern = r"\bV\d+\.\d+\.\d+\b"
output = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
match = re.search(pattern, output)
if match:
vers = match.group()[1:].split('.')
return f"{vers[0]}.{vers[1]}" # only keep the major and minor version.
return None
def _load_vsdevcmd(project_root):
if os.environ.get(VSINSTALLDIR_NAME) is None:
stdout, _ = subprocess.Popen([
'powershell', ' -noprofile', '-executionpolicy',
'bypass', '-f', project_root + '/tools/get_vsdevcmd.ps1', '-outputEnv', '1'],
stdout=subprocess.PIPE, shell=False, universal_newlines=True).communicate()
for line in stdout.splitlines():
kv_pair = line.split('=')
if len(kv_pair) == 2:
os.environ[kv_pair[0]] = kv_pair[1]
else:
import shutil
if shutil.which('cmake') is None:
raise SystemExit(
"Cannot find cmake in the executable path, "
"please run this script under Developer Command Prompt for VS.")
def prepare_env(project_root):
if sys.platform == "win32":
_load_vsdevcmd(project_root)
def read_git_refs(project_root):
release_branch = False
stdout, _ = subprocess.Popen(
['git'] + ['log', '-1', '--format=%H'],
cwd=project_root,
stdout=subprocess.PIPE, universal_newlines=True).communicate()
HEAD = dedent(stdout.splitlines()[0]).strip('\n\r')
stdout, _ = subprocess.Popen(
['git'] + ['show-ref', '--head'],
cwd=project_root,
stdout=subprocess.PIPE, universal_newlines=True).communicate()
for _ln in stdout.splitlines():
_ln = dedent(_ln).strip('\n\r')
if _ln.startswith(HEAD):
_, _2 = _ln.split(' ')
if _2.startswith('refs/remotes/origin/rel-'):
release_branch = True
return release_branch, HEAD
class CommandMixin:
user_options = [
(ORTX_USER_OPTION + '=', None, "extensions options for kernel building")
]
config_settings = None
# noinspection PyAttributeOutsideInit
def initialize_options(self) -> None:
super().initialize_options()
self.ortx_user_option = None
def finalize_options(self) -> None:
if self.ortx_user_option is not None:
if CommandMixin.config_settings is None:
CommandMixin.config_settings = {
ORTX_USER_OPTION: self.ortx_user_option}
else:
raise RuntimeError(
f"Cannot pass {ORTX_USER_OPTION} several times, like as the command args and in backend API.")
super().finalize_options()
class CmdDevelop(CommandMixin, _develop):
user_options = getattr(_develop, 'user_options', []
) + CommandMixin.user_options
class CmdBuild(CommandMixin, _build):
user_options = getattr(_build, 'user_options', []) + \
CommandMixin.user_options
# noinspection PyAttributeOutsideInit
def finalize_options(self) -> None:
# There is a bug in setuptools that prevents the build get the right platform name from arguments.
# So, it cannot generate the correct wheel with the right arch in Official release pipeline.
# Force plat_name to be 'win-amd64' in Windows to fix that,
# since extensions cmake is only available on x64 for Windows now, it is not a problem to hardcode it.
if sys.platform == "win32" and "arm" not in sys.version.lower():
self.plat_name = "win-amd64"
if os.environ.get('OCOS_SCB_DEBUG', None) == '1':
self.debug = True
super().finalize_options()
class CmdBuildCMakeExt(_build_ext):
# noinspection PyAttributeOutsideInit
def initialize_options(self):
super().initialize_options()
self.use_cuda = None
self.no_azure = None
self.no_opencv = None
self.cc_debug = None
def _parse_options(self, options):
for segment in options.split(','):
if not segment:
continue
key = segment
if '=' in segment:
key, value = segment.split('=')
else:
value = 1
key = key.replace('-', '_')
if not hasattr(self, key):
raise RuntimeError(
f"Unknown {ORTX_USER_OPTION} option value: {key}")
setattr(self, key, value)
return self
def finalize_options(self) -> None:
if CommandMixin.config_settings is not None:
self._parse_options(
CommandMixin.config_settings.get(ORTX_USER_OPTION, ""))
if self.cc_debug:
self.debug = True
super().finalize_options()
def run(self):
"""
Perform build_cmake before doing the 'normal' stuff
"""
for extension in self.extensions:
if extension.name == 'onnxruntime_extensions._extensions_pydll':
self.build_cmake(extension)
def build_cmake(self, extension):
project_dir = pathlib.Path().absolute()
build_temp = pathlib.Path(self.build_temp)
build_temp.mkdir(parents=True, exist_ok=True)
ext_fullpath = pathlib.Path(
self.get_ext_fullpath(extension.name)).absolute()
config = 'RelWithDebInfo' if self.debug else 'Release'
cmake_args = [
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' +
str(ext_fullpath.parent.absolute()),
'-DOCOS_BUILD_PYTHON=ON',
'-DOCOS_PYTHON_MODULE_PATH=' + str(ext_fullpath),
'-DCMAKE_BUILD_TYPE=' + config
]
if self.no_opencv:
# Disabling openCV can drastically reduce the build time.
cmake_args += [
'-DOCOS_ENABLE_OPENCV_CODECS=OFF',
'-DOCOS_ENABLE_CV2=OFF',
'-DOCOS_ENABLE_VISION=OFF']
if self.no_azure is not None:
azure_flag = "OFF" if self.no_azure == 1 else "ON"
cmake_args += ['-DOCOS_ENABLE_AZURE=' + azure_flag]
print("=> AzureOp build flag: " + azure_flag)
if self.use_cuda is not None:
cuda_flag = "OFF" if self.use_cuda == 0 else "ON"
cmake_args += ['-DOCOS_USE_CUDA=' + cuda_flag]
print("=> CUDA build flag: " + cuda_flag)
cuda_ver = _load_cuda_version()
if cuda_ver is None:
raise RuntimeError(
"Cannot find nvcc in your env:path, use-cuda doesn't work")
f_ver = ext_fullpath.parent / "_version.py"
with f_ver.open('a') as _f:
_f.writelines(["\n",
f"cuda = {cuda_ver}",
"\n"])
# CMake lets you override the generator - we need to check this.
# Can be set with Conda-Build, for example.
cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
# Adding CMake arguments set as environment variable
# (needed e.g. to build for ARM OSx on conda-forge)
if "CMAKE_ARGS" in os.environ:
cmake_args += [
item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
if sys.platform != "win32":
# Using Ninja-build since it a) is available as a wheel and b)
# multithread automatically. MSVC would require all variables be
# exported for Ninja to pick it up, which is a little tricky to do.
# Users can override the generator with CMAKE_GENERATOR in CMake
# 3.15+.
if not cmake_generator or cmake_generator == "Ninja":
try:
import ninja # noqa: F401
ninja_executable_path = os.path.join(
ninja.BIN_DIR, "ninja")
cmake_args += [
"-GNinja",
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
]
except ImportError:
pass
if sys.platform.startswith("darwin"):
# Cross-compile support for macOS - respect ARCHFLAGS if set
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
if archs:
cmake_args += [
"-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
# overwrite the Python module info if the auto-detection doesn't work.
# export Python3_INCLUDE_DIRS=/opt/python/cp38-cp38
# export Python3_LIBRARIES=/opt/python/cp38-cp38
for env in ['Python3_INCLUDE_DIRS', 'Python3_LIBRARIES']:
if env in os.environ:
cmake_args.append("-D%s=%s" % (env, os.environ[env]))
if self.debug:
cmake_args += ['-DCC_OPTIMIZE=OFF']
# the parallel build has to be limited on some Linux VM machine.
cpu_number = os.environ.get('CPU_NUMBER')
build_args = [
'--config', config,
'--parallel' + ('' if cpu_number is None else ' ' + cpu_number)
]
cmake_exe = 'cmake'
# unlike Linux/macOS, cmake pip package on Windows fails to build some 3rd party dependencies.
# so we have to use the cmake installed from Visual Studio.
if os.environ.get(VSINSTALLDIR_NAME):
cmake_exe = os.environ[VSINSTALLDIR_NAME] + \
'Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin\\cmake.exe'
# Add this cmake directory into PATH to make sure the child-process still find it.
os.environ['PATH'] = os.path.dirname(
cmake_exe) + os.pathsep + os.environ['PATH']
self.spawn([cmake_exe, '-S', str(project_dir),
'-B', str(build_temp)] + cmake_args)
if not self.dry_run:
self.spawn([cmake_exe, '--build', str(build_temp)] + build_args)
ortx_cmdclass = dict(build=CmdBuild,
develop=CmdDevelop,
build_ext=CmdBuildCMakeExt)

Просмотреть файл

@ -1,44 +1,83 @@
# Build and Development # Build and Development
This project supports Python and can be built from source easily, or a simple cmake build without Python dependency. This project supports Python and can be built from source easily, or a simple cmake build without Python dependency.
## Python package ## Python package
The package contains all custom operators and some Python scripts to manipulate the ONNX models. The package contains all custom operators and some Python scripts to manipulate the ONNX models.
- Install Visual Studio with C++ development tools on Windows, or gcc(>8.0) for Linux or xcode for macOS, and cmake on the unix-like platform. (**hints**: in Windows platform, if cmake bundled in Visual Studio was used, please specify the set _VSDEVCMD=%ProgramFiles(x86)%\Microsoft Visual Studio\<VERSION_YEAR>\<Edition>\Common7\Tools\VsDevCmd.bat_)
- If running on Windows, ensure that long file names are enabled, both for the [operating system](https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd) and for git: `git config --system core.longpaths true` - Install Visual Studio with C++ development tools on Windows, or gcc(>8.0) for Linux or xcode for macOS, and cmake on
the unix-like platform.
- If running on Windows, ensure that long file names are enabled, both for
the [operating system](https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd)
and for git: `git config --system core.longpaths true`
- Prepare Python env and install the pip packages in the requirements.txt. - Prepare Python env and install the pip packages in the requirements.txt.
- `pip install .` to build and install the package.<br/> OR `pip install -e .` to install the package in the development mode, which is more friendly for the developer since the Python code change will take effect without having to copy the files to a different location in the disk.(**hints**: debug=1 in setup.cfg wil make C++ code be debuggable in a Python process.) - `pip install .` to build and install the package.<br/> OR `pip install -e .` to install the package in the development
mode, which is more friendly for the developer since the Python code change will take effect without having to copy
the files to a different location in the disk.(**hints**: debug=1 in setup.cfg wil make C++ code be debuggable in a
Python process.)
- Add the following argument `--config-settings "ortx-user-option=use-cuda"` in the pip command line to enable **CUDA**
kernels for the package.
- The flags can be used in --config-settings are
- use-cuda: enable CUDA kernel build in Python package.
- no-azure: disable AzureOp kernel build in Python package.
- no-opencv: disable operators based on OpenCV in build.
- cc_debug: Generate debug info for extensions binaries and disable C/C++ compiler optimization.
For example:`pip install --config-settings "ortx-user-option=use-cuda,cc_debug" `, This command builds CUDA
kernels into the package and installs it, accompanied by the generation of debug information.
Test: Test:
- 'pip install -r requirements-dev.txt' to install pip packages for development. - 'pip install -r requirements-dev.txt' to install pip packages for development.
- run `pytest test` in the project root directory. - run `pytest test` in the project root directory.
For a complete list of verified build configurations see [here](<./ci_matrix.md>) For a complete list of verified build configurations see [here](<./ci_matrix.md>)
## Java package ## Java package
`bash ./build.sh -DOCOS_BUILD_JAVA=ON` to build jar package in out/<OS>/Release folder `bash ./build.sh -DOCOS_BUILD_JAVA=ON` to build jar package in out/<OS>/Release folder
## Android package ## Android package
- pre-requisites: [Android Studio](https://developer.android.com/studio) - pre-requisites: [Android Studio](https://developer.android.com/studio)
Use `./tools/android/build_aar.py` to build an Android AAR package. Use `./tools/android/build_aar.py` to build an Android AAR package.
## iOS package ## iOS package
Use `./tools/ios/build_xcframework.py` to build an iOS xcframework package. Use `./tools/ios/build_xcframework.py` to build an iOS xcframework package.
## NuGet package ## NuGet package
In order to build a local NuGet package for testing, run `nuget.exe pack ./nuget/WinOnlyNuget.nuspec` to build a NuGet package for Windows.
Note: you might need to update the src paths in the ./nuget/WinOnlyNuget.nuspec file if the appropriate ortextensions.dll files do not exist/are not in the given location. In order to build a local NuGet package for testing, run `nuget.exe pack ./nuget/WinOnlyNuget.nuspec` to build a NuGet
package for Windows.
Note: you might need to update the src paths in the ./nuget/WinOnlyNuget.nuspec file if the appropriate
ortextensions.dll files do not exist/are not in the given location.
## Web-Assembly ## Web-Assembly
ONNXRuntime-Extensions will be built as a static library and linked with ONNXRuntime due to the lack of a good dynamic linking mechanism in WASM. Here are two additional arguments [–-use_extensions and --extensions_overridden_path](https://github.com/microsoft/onnxruntime/blob/860ba8820b72d13a61f0d08b915cd433b738ffdc/tools/ci_build/build.py#L416) on building onnxruntime to include ONNXRuntime-Extensions footprint in the ONNXRuntime package.
ONNXRuntime-Extensions will be built as a static library and linked with ONNXRuntime due to the lack of a good dynamic
linking mechanism in WASM. Here are two additional
arguments [–-use_extensions and --extensions_overridden_path](https://github.com/microsoft/onnxruntime/blob/860ba8820b72d13a61f0d08b915cd433b738ffdc/tools/ci_build/build.py#L416)
on building onnxruntime to include ONNXRuntime-Extensions footprint in the ONNXRuntime package.
## The C++ shared library ## The C++ shared library
for any other cases, please run `build.bat` or `bash ./build.sh` to build the library. By default, the DLL or the library will be generated in the directory `out/<OS>/<FLAVOR>`. There is a unit test to help verify the build.
For any alternative scenarios, execute the following commands:
- On Windows: Run `build.bat`.
- On Unix-based systems: Execute `bash ./build.sh`.
The generated DLL or library is typically located in the `out/<OS>/<FLAVOR>` directory. To validate the build, utilize
the unit tests available in the `test/test_static_test` and `test/shared_test` directories.
**VC Runtime static linkage** **VC Runtime static linkage**
If you want to build the binary with VC Runtime static linkage, please add a parameter _-DCMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded$<$<CONFIG:Debug>:Debug>"_ on running build.bat If you want to build the binary with VC Runtime static linkage, please add a parameter _-DCMAKE_MSVC_RUNTIME_LIBRARY="
MultiThreaded$<$<CONFIG:Debug>:Debug>"_ on running build.bat
## Copyright guidance ## Copyright guidance
check this link https://docs.opensource.microsoft.com/releasing/general-guidance/copyright-headers/ for source file copyright header.
check this link https://docs.opensource.microsoft.com/releasing/general-guidance/copyright-headers/ for source file
copyright header.

Просмотреть файл

@ -2,7 +2,6 @@
# Licensed under the MIT License. See License.txt in the project root for # Licensed under the MIT License. See License.txt in the project root for
# license information. # license information.
############################################################################### ###############################################################################
""" """
The `onnxruntime-extensions` Python package offers an API that allows users to generate models for pre-processing and The `onnxruntime-extensions` Python package offers an API that allows users to generate models for pre-processing and
post-processing tasks. In addition, it also provides an API to register custom operations implemented in Python. post-processing tasks. In addition, it also provides an API to register custom operations implemented in Python.
@ -36,7 +35,7 @@ from ._ocos import enable_py_op
from ._ocos import expand_onnx_inputs from ._ocos import expand_onnx_inputs
from ._ocos import hook_model_op from ._ocos import hook_model_op
from ._ocos import default_opset_domain from ._ocos import default_opset_domain
from ._cuops import * # noqa from ._cuops import * # noqa
from ._ortapi2 import OrtPyFunction as PyOrtFunction # backward compatibility from ._ortapi2 import OrtPyFunction as PyOrtFunction # backward compatibility
from ._ortapi2 import OrtPyFunction, ort_inference, optimize_model, make_onnx_model from ._ortapi2 import OrtPyFunction, ort_inference, optimize_model, make_onnx_model
from ._ortapi2 import ONNXRuntimeError, ONNXRuntimeException from ._ortapi2 import ONNXRuntimeError, ONNXRuntimeException

Просмотреть файл

@ -5,11 +5,34 @@
""" """
_ocos.py: PythonOp implementation _ocos.py: PythonOp implementation
""" """
import os
import sys import sys
import copy import copy
import glob
import onnx import onnx
from onnx import helper from onnx import helper
def _search_cuda_dir():
paths = os.getenv('PATH', '').split(os.pathsep)
for path in paths:
for filename in glob.glob(os.path.join(path, 'cudart64*.dll')):
return os.path.dirname(filename)
return None
if sys.platform == 'win32':
from . import _version # noqa: E402
if hasattr(_version, 'cuda'):
cuda_path = _search_cuda_dir()
if cuda_path is None:
raise RuntimeError(
"Cannot locate CUDA directory in the environment variable for GPU package")
os.add_dll_directory(cuda_path)
from ._extensions_pydll import ( # noqa from ._extensions_pydll import ( # noqa
PyCustomOpDef, enable_py_op, add_custom_op, hash_64, default_opset_domain) PyCustomOpDef, enable_py_op, add_custom_op, hash_64, default_opset_domain)
@ -65,7 +88,7 @@ class Opdef:
if attrs is None: if attrs is None:
attrs = {} attrs = {}
elif isinstance(attrs, (list, tuple)): elif isinstance(attrs, (list, tuple)):
attrs = {k: PyCustomOpDef.dt_string for k in attrs} attrs = {k: PyCustomOpDef.dt_string for k in attrs}
opdef._nativedef.attrs = attrs opdef._nativedef.attrs = attrs
add_custom_op(opdef._nativedef) add_custom_op(opdef._nativedef)
return opdef return opdef
@ -115,7 +138,8 @@ def _ensure_opset_domain(model):
domain_missing = False domain_missing = False
if domain_missing: if domain_missing:
model.opset_import.extend([helper.make_operatorsetid(op_domain_name, 1)]) model.opset_import.extend(
[helper.make_operatorsetid(op_domain_name, 1)])
return model return model
@ -130,7 +154,8 @@ def expand_onnx_inputs(model, target_input, extra_nodes, new_inputs):
:return: The ONNX model after modification :return: The ONNX model after modification
""" """
graph = model.graph graph = model.graph
new_inputs = [n for n in graph.input if n.name != target_input] + new_inputs new_inputs = [n for n in graph.input if n.name !=
target_input] + new_inputs
new_nodes = list(model.graph.node) + extra_nodes new_nodes = list(model.graph.node) + extra_nodes
new_graph = helper.make_graph( new_graph = helper.make_graph(
new_nodes, graph.name, new_inputs, list(graph.output), list(graph.initializer)) new_nodes, graph.name, new_inputs, list(graph.output), list(graph.initializer))
@ -179,7 +204,8 @@ def hook_model_op(model, node_name, hook_func, input_types):
del hkd_model.graph.node[:] del hkd_model.graph.node[:]
hkd_model.graph.node.extend(repacked) hkd_model.graph.node.extend(repacked)
Opdef.create(hook_func, op_type=optype_name, inputs=input_types, outputs=input_types) Opdef.create(hook_func, op_type=optype_name,
inputs=input_types, outputs=input_types)
return _ensure_opset_domain(hkd_model) return _ensure_opset_domain(hkd_model)

Просмотреть файл

@ -1,6 +1,8 @@
[build-system] [build-system]
# Minimum requirements for the build system to execute. # Minimum requirements for the build system to execute.
requires = ["setuptools", "wheel", "numpy>=1.18.5", "ninja", "cmake"] # PEP 508 specifications. requires = ["setuptools", "wheel", "numpy>=1.18.5", "ninja", "cmake"] # PEP 508 specifications.
build-backend = "backend"
backend-path = [".pyproject"]
[tool.black] [tool.black]
line-length = 120 line-length = 120

185
setup.py
Просмотреть файл

@ -4,178 +4,24 @@
# license information. # license information.
########################################################################### ###########################################################################
from setuptools import setup, find_packages
from setuptools.command.build import build as _build
from setuptools.command.build_ext import build_ext as _build_ext
import re
import os import os
import sys import sys
import setuptools
import pathlib import pathlib
import subprocess import setuptools
from textwrap import dedent from textwrap import dedent
from setuptools import setup, find_packages
TOP_DIR = os.path.dirname(__file__) or os.getcwd() TOP_DIR = os.path.dirname(__file__) or os.getcwd()
PACKAGE_NAME = 'onnxruntime_extensions' PACKAGE_NAME = 'onnxruntime_extensions'
VSINSTALLDIR_NAME = 'VSINSTALLDIR'
# setup.py cannot be debugged in pip command line, so the command classes are refactored into another file
cmds_dir = pathlib.Path(TOP_DIR) / '.pyproject'
sys.path.append(str(cmds_dir))
# noinspection PyUnresolvedReferences
import cmdclass as _cmds # noqa: E402
def load_vsdevcmd(): _cmds.prepare_env(TOP_DIR)
if os.environ.get(VSINSTALLDIR_NAME) is None:
stdout, _ = subprocess.Popen([
'powershell', ' -noprofile', '-executionpolicy',
'bypass', '-f', TOP_DIR + '/tools/get_vsdevcmd.ps1', '-outputEnv', '1'],
stdout=subprocess.PIPE, shell=False, universal_newlines=True).communicate()
for line in stdout.splitlines():
kv_pair = line.split('=')
if len(kv_pair) == 2:
os.environ[kv_pair[0]] = kv_pair[1]
else:
import shutil
if shutil.which('cmake') is None:
raise SystemExit(
"Cannot find cmake in the executable path, "
"please run this script under Developer Command Prompt for VS.")
def read_git_refs():
release_branch = False
stdout, _ = subprocess.Popen(
['git'] + ['log', '-1', '--format=%H'],
cwd=TOP_DIR,
stdout=subprocess.PIPE, universal_newlines=True).communicate()
HEAD = dedent(stdout.splitlines()[0]).strip('\n\r')
stdout, _ = subprocess.Popen(
['git'] + ['show-ref', '--head'],
cwd=TOP_DIR,
stdout=subprocess.PIPE, universal_newlines=True).communicate()
for _ln in stdout.splitlines():
_ln = dedent(_ln).strip('\n\r')
if _ln.startswith(HEAD):
_, _2 = _ln.split(' ')
if _2.startswith('refs/remotes/origin/rel-'):
release_branch = True
return release_branch, HEAD
class BuildCMakeExt(_build_ext):
def run(self):
"""
Perform build_cmake before doing the 'normal' stuff
"""
for extension in self.extensions:
if extension.name == 'onnxruntime_extensions._extensions_pydll':
self.build_cmake(extension)
def build_cmake(self, extension):
project_dir = pathlib.Path().absolute()
build_temp = pathlib.Path(self.build_temp)
build_temp.mkdir(parents=True, exist_ok=True)
ext_fullpath = pathlib.Path(self.get_ext_fullpath(extension.name)).absolute()
config = 'RelWithDebInfo' if self.debug else 'Release'
cmake_args = [
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(ext_fullpath.parent.absolute()),
'-DOCOS_BUILD_PYTHON=ON',
'-DOCOS_PYTHON_MODULE_PATH=' + str(ext_fullpath),
'-DCMAKE_BUILD_TYPE=' + config
]
if os.environ.get('OCOS_NO_OPENCV') == '1':
# Disabling openCV can drastically reduce the build time.
cmake_args += [
'-DOCOS_ENABLE_OPENCV_CODECS=OFF',
'-DOCOS_ENABLE_CV2=OFF',
'-DOCOS_ENABLE_VISION=OFF']
# explicitly set the flag for AzureOp, despite the default value in CMakeLists.txt
azure_flag = "ON" if os.environ.get('OCOS_ENABLE_AZURE') == '1' else None
if azure_flag is None:
# OCOS_NO_AZURE will be ignored if OCOS_ENABLE_AZURE is set.
azure_flag = "OFF" if os.environ.get('OCOS_NO_AZURE') == '1' else None
if azure_flag is not None:
cmake_args += ['-DOCOS_ENABLE_AZURE=' + azure_flag]
print("=> AzureOp build flag: " + azure_flag)
# CMake lets you override the generator - we need to check this.
# Can be set with Conda-Build, for example.
cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
# Adding CMake arguments set as environment variable
# (needed e.g. to build for ARM OSx on conda-forge)
if "CMAKE_ARGS" in os.environ:
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
if sys.platform != "win32":
# Using Ninja-build since it a) is available as a wheel and b)
# multithreads automatically. MSVC would require all variables be
# exported for Ninja to pick it up, which is a little tricky to do.
# Users can override the generator with CMAKE_GENERATOR in CMake
# 3.15+.
if not cmake_generator or cmake_generator == "Ninja":
try:
import ninja # noqa: F401
ninja_executable_path = os.path.join(ninja.BIN_DIR, "ninja")
cmake_args += [
"-GNinja",
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
]
except ImportError:
pass
if sys.platform.startswith("darwin"):
# Cross-compile support for macOS - respect ARCHFLAGS if set
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
if archs:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
# overwrite the Python module info if the auto-detection doesn't work.
# export Python3_INCLUDE_DIRS=/opt/python/cp38-cp38
# export Python3_LIBRARIES=/opt/python/cp38-cp38
for env in ['Python3_INCLUDE_DIRS', 'Python3_LIBRARIES']:
if env in os.environ:
cmake_args.append("-D%s=%s" % (env, os.environ[env]))
if self.debug:
cmake_args += ['-DCC_OPTIMIZE=OFF']
# the parallel build has to be limited on some Linux VM machine.
cpu_number = os.environ.get('CPU_NUMBER')
build_args = [
'--config', config,
'--parallel' + ('' if cpu_number is None else ' ' + cpu_number)
]
cmake_exe = 'cmake'
# unlike Linux/macOS, cmake pip package on Windows fails to build some 3rd party dependencies.
# so we have to use the cmake installed from Visual Studio.
if os.environ.get(VSINSTALLDIR_NAME):
cmake_exe = os.environ[VSINSTALLDIR_NAME] + \
'Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin\\cmake.exe'
# Add this cmake directory into PATH to make sure the child-process still find it.
os.environ['PATH'] = os.path.dirname(cmake_exe) + os.pathsep + os.environ['PATH']
self.spawn([cmake_exe, '-S', str(project_dir), '-B', str(build_temp)] + cmake_args)
if not self.dry_run:
self.spawn([cmake_exe, '--build', str(build_temp)] + build_args)
class Build(_build):
def initialize_options(self) -> None:
super().initialize_options()
if os.environ.get('OCOS_SCB_DEBUG', None) == '1':
self.debug = True
def finalize_options(self) -> None:
# There is a bug in setuptools that prevents the build get the right platform name from arguments.
# So, it cannot generate the correct wheel with the right arch in Official release pipeline.
# Force plat_name to be 'win-amd64' in Windows to fix that.
# Since extensions cmake is only available on x64 for Windows now, it is not a problem to hardcode it.
if sys.platform == "win32" and "arm" not in sys.version.lower():
self.plat_name = "win-amd64"
super().finalize_options()
def read_requirements(): def read_requirements():
@ -195,7 +41,7 @@ def read_version():
return version_str return version_str
# is it a dev build or release? # is it a dev build or release?
rel_br, cid = read_git_refs() if os.path.isdir( rel_br, cid = _cmds.read_git_refs(TOP_DIR) if os.path.isdir(
os.path.join(TOP_DIR, '.git')) else (True, None) os.path.join(TOP_DIR, '.git')) else (True, None)
if rel_br: if rel_br:
@ -209,16 +55,13 @@ def read_version():
return version_str return version_str
def write_py_version(ortx_version): def write_py_version(ext_version):
text = ["# Generated by setup.py, DON'T MANUALLY UPDATE IT!\n", text = ["# Generated by setup.py, DON'T MANUALLY UPDATE IT!\n",
"__version__ = \"{}\"\n".format(ortx_version)] "__version__ = \"{}\"\n".format(ext_version)]
with (open(os.path.join(TOP_DIR, 'onnxruntime_extensions/_version.py'), "w")) as _f: with (open(os.path.join(TOP_DIR, 'onnxruntime_extensions/_version.py'), "w")) as _fver:
_f.writelines(text) _fver.writelines(text)
if sys.platform == "win32":
load_vsdevcmd()
ext_modules = [ ext_modules = [
setuptools.extension.Extension( setuptools.extension.Extension(
name=str('onnxruntime_extensions._extensions_pydll'), name=str('onnxruntime_extensions._extensions_pydll'),
@ -255,7 +98,7 @@ setup(
author_email='onnxruntime@microsoft.com', author_email='onnxruntime@microsoft.com',
url='https://github.com/microsoft/onnxruntime-extensions', url='https://github.com/microsoft/onnxruntime-extensions',
ext_modules=ext_modules, ext_modules=ext_modules,
cmdclass=dict(build_ext=BuildCMakeExt, build=Build), cmdclass=_cmds.ortx_cmdclass,
include_package_data=True, include_package_data=True,
install_requires=read_requirements(), install_requires=read_requirements(),
classifiers=[ classifiers=[

47
test/cuda/test_cudaops.py Normal file
Просмотреть файл

@ -0,0 +1,47 @@
import unittest
import numpy as np
from numpy.testing import assert_almost_equal
from onnx import helper, onnx_pb as onnx_proto
from onnxruntime_extensions import make_onnx_model
from onnxruntime_extensions import get_library_path as _get_library_path
import onnxruntime as _ort
class TestCudaOps(unittest.TestCase):
@staticmethod
def _create_test_model(domain='ai.onnx.contrib'):
nodes = [
helper.make_node('Identity', ['x'], ['identity1']),
helper.make_node(
'NegPos', ['identity1'], ['neg', 'pos'],
domain=domain)
]
input0 = helper.make_tensor_value_info(
'x', onnx_proto.TensorProto.FLOAT, [])
output1 = helper.make_tensor_value_info(
'neg', onnx_proto.TensorProto.FLOAT, [])
output2 = helper.make_tensor_value_info(
'pos', onnx_proto.TensorProto.FLOAT, [])
graph = helper.make_graph(nodes, 'test0', [input0], [output1, output2])
model = make_onnx_model(graph)
return model
def test_cuda_negpos(self):
so = _ort.SessionOptions()
so.register_custom_ops_library(_get_library_path())
onnx_model = self._create_test_model()
self.assertIn('op_type: "NegPos"', str(onnx_model))
sess = _ort.InferenceSession(onnx_model.SerializeToString(),
so,
providers=['CUDAExecutionProvider'])
x = np.array([[0., 1., 1.5], [7., 8., -5.5]]).astype(np.float32)
neg, pos = sess.run(None, {'x': x})
diff = x - (neg + pos)
assert_almost_equal(diff, np.zeros(diff.shape))
if __name__ == "__main__":
unittest.main()