2020-02-01 02:57:11 +03:00
|
|
|
"""
|
|
|
|
Copyright 2020 The Microsoft DeepSpeed Team
|
|
|
|
|
|
|
|
DeepSpeed library
|
|
|
|
|
2021-05-24 11:10:39 +03:00
|
|
|
To build wheel on Windows:
|
2022-10-26 03:14:43 +03:00
|
|
|
1. Install pytorch, such as pytorch 1.12 + cuda 11.6
|
2021-05-24 11:10:39 +03:00
|
|
|
2. Install visual cpp build tool
|
2022-10-26 03:14:43 +03:00
|
|
|
3. Include cuda toolkit
|
|
|
|
4. Launch cmd console with Administrator privilege for creating required symlink folders
|
2021-05-24 11:10:39 +03:00
|
|
|
|
|
|
|
Create a new wheel via the following command:
|
2022-10-26 03:14:43 +03:00
|
|
|
build_win.bat
|
2020-02-01 02:57:11 +03:00
|
|
|
|
|
|
|
The wheel will be located at: dist/*.whl
|
|
|
|
"""
|
|
|
|
|
2020-03-12 23:08:58 +03:00
|
|
|
import os
|
2021-05-24 11:10:39 +03:00
|
|
|
import sys
|
2020-09-02 04:06:15 +03:00
|
|
|
import subprocess
|
2020-02-01 02:57:11 +03:00
|
|
|
from setuptools import setup, find_packages
|
2022-01-19 03:25:01 +03:00
|
|
|
from setuptools.command import egg_info
|
2021-03-08 23:54:54 +03:00
|
|
|
import time
|
2020-11-13 00:58:14 +03:00
|
|
|
|
2021-06-17 00:18:37 +03:00
|
|
|
torch_available = True
|
2020-11-13 00:58:14 +03:00
|
|
|
try:
|
|
|
|
import torch
|
|
|
|
from torch.utils.cpp_extension import BuildExtension
|
|
|
|
except ImportError:
|
2021-06-17 00:18:37 +03:00
|
|
|
torch_available = False
|
|
|
|
print('[WARNING] Unable to import torch, pre-compiling ops will be disabled. ' \
|
|
|
|
'Please visit https://pytorch.org/ to see how to properly install torch on your system.')
|
2020-09-02 04:06:15 +03:00
|
|
|
|
2022-03-03 04:53:35 +03:00
|
|
|
from op_builder import ALL_OPS, get_default_compute_capabilities, OpBuilder
|
2022-11-08 21:17:03 +03:00
|
|
|
from op_builder.builder import installed_cuda_version
|
2022-03-03 04:53:35 +03:00
|
|
|
|
|
|
|
# fetch rocm state
|
|
|
|
is_rocm_pytorch = OpBuilder.is_rocm_pytorch()
|
|
|
|
rocm_version = OpBuilder.installed_rocm_version()
|
2020-09-02 04:06:15 +03:00
|
|
|
|
2021-07-29 08:42:27 +03:00
|
|
|
RED_START = '\033[31m'
|
|
|
|
RED_END = '\033[0m'
|
|
|
|
ERROR = f"{RED_START} [ERROR] {RED_END}"
|
|
|
|
|
|
|
|
|
|
|
|
def abort(msg):
|
|
|
|
print(f"{ERROR} {msg}")
|
|
|
|
assert False, msg
|
|
|
|
|
2020-09-02 04:06:15 +03:00
|
|
|
|
|
|
|
def fetch_requirements(path):
|
|
|
|
with open(path, 'r') as fd:
|
|
|
|
return [r.strip() for r in fd.readlines()]
|
|
|
|
|
|
|
|
|
|
|
|
install_requires = fetch_requirements('requirements/requirements.txt')
|
2020-11-12 22:51:38 +03:00
|
|
|
extras_require = {
|
2022-03-03 04:53:35 +03:00
|
|
|
'1bit': [], # add cupy based on cuda/rocm version
|
|
|
|
'1bit_mpi': fetch_requirements('requirements/requirements-1bit-mpi.txt'),
|
2020-11-12 22:51:38 +03:00
|
|
|
'readthedocs': fetch_requirements('requirements/requirements-readthedocs.txt'),
|
|
|
|
'dev': fetch_requirements('requirements/requirements-dev.txt'),
|
2021-11-13 11:56:55 +03:00
|
|
|
'autotuning': fetch_requirements('requirements/requirements-autotuning.txt'),
|
|
|
|
'autotuning_ml': fetch_requirements('requirements/requirements-autotuning-ml.txt'),
|
2022-06-16 00:21:19 +03:00
|
|
|
'sparse_attn': fetch_requirements('requirements/requirements-sparse_attn.txt'),
|
2022-10-18 23:57:02 +03:00
|
|
|
'inf': fetch_requirements('requirements/requirements-inf.txt'),
|
|
|
|
'sd': fetch_requirements('requirements/requirements-sd.txt')
|
2020-11-12 22:51:38 +03:00
|
|
|
}
|
2020-09-02 04:06:15 +03:00
|
|
|
|
2021-11-11 19:57:17 +03:00
|
|
|
# Add specific cupy version to both onebit extension variants
|
2021-06-17 00:18:37 +03:00
|
|
|
if torch_available and torch.cuda.is_available():
|
2022-03-03 04:53:35 +03:00
|
|
|
cupy = None
|
|
|
|
if is_rocm_pytorch:
|
|
|
|
rocm_major, rocm_minor = rocm_version
|
|
|
|
# XXX cupy support for rocm 5 is not available yet
|
|
|
|
if rocm_major <= 4:
|
|
|
|
cupy = f"cupy-rocm-{rocm_major}-{rocm_minor}"
|
|
|
|
else:
|
2022-11-08 21:17:03 +03:00
|
|
|
cupy = f"cupy-cuda{''.join(map(str,installed_cuda_version()))}"
|
2022-03-03 04:53:35 +03:00
|
|
|
if cupy:
|
|
|
|
extras_require['1bit'].append(cupy)
|
|
|
|
extras_require['1bit_mpi'].append(cupy)
|
2020-11-12 22:51:38 +03:00
|
|
|
|
|
|
|
# Make an [all] extra that installs all needed dependencies
|
|
|
|
all_extras = set()
|
|
|
|
for extra in extras_require.items():
|
|
|
|
for req in extra[1]:
|
|
|
|
all_extras.add(req)
|
|
|
|
extras_require['all'] = list(all_extras)
|
2020-02-01 02:57:11 +03:00
|
|
|
|
|
|
|
cmdclass = {}
|
2020-11-12 22:51:38 +03:00
|
|
|
|
|
|
|
# For any pre-installed ops force disable ninja
|
2021-06-17 00:18:37 +03:00
|
|
|
if torch_available:
|
|
|
|
cmdclass['build_ext'] = BuildExtension.with_options(use_ninja=False)
|
2020-02-01 02:57:11 +03:00
|
|
|
|
2021-06-17 00:18:37 +03:00
|
|
|
if torch_available:
|
|
|
|
TORCH_MAJOR = torch.__version__.split('.')[0]
|
|
|
|
TORCH_MINOR = torch.__version__.split('.')[1]
|
|
|
|
else:
|
|
|
|
TORCH_MAJOR = "0"
|
|
|
|
TORCH_MINOR = "0"
|
2020-03-12 23:08:58 +03:00
|
|
|
|
2021-06-17 00:18:37 +03:00
|
|
|
if torch_available and not torch.cuda.is_available():
|
2020-11-20 00:49:31 +03:00
|
|
|
# Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486
|
2020-03-12 23:08:58 +03:00
|
|
|
print(
|
2020-11-20 00:49:31 +03:00
|
|
|
"[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
|
2020-03-12 23:08:58 +03:00
|
|
|
"you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
|
|
|
|
"(compute capabilities 6.0, 6.1, 6.2)")
|
|
|
|
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
|
2021-10-02 05:56:32 +03:00
|
|
|
os.environ["TORCH_CUDA_ARCH_LIST"] = get_default_compute_capabilities()
|
2020-03-12 23:08:58 +03:00
|
|
|
|
2020-09-02 04:06:15 +03:00
|
|
|
ext_modules = []
|
|
|
|
|
2021-05-24 11:10:39 +03:00
|
|
|
# Default to pre-install kernels to false so we rely on JIT on Linux, opposite on Windows.
|
|
|
|
BUILD_OP_PLATFORM = 1 if sys.platform == "win32" else 0
|
|
|
|
BUILD_OP_DEFAULT = int(os.environ.get('DS_BUILD_OPS', BUILD_OP_PLATFORM))
|
2020-11-12 22:51:38 +03:00
|
|
|
print(f"DS_BUILD_OPS={BUILD_OP_DEFAULT}")
|
2020-09-02 04:06:15 +03:00
|
|
|
|
2021-06-17 00:18:37 +03:00
|
|
|
if BUILD_OP_DEFAULT:
|
|
|
|
assert torch_available, "Unable to pre-compile ops without torch installed. Please install torch before attempting to pre-compile ops."
|
|
|
|
|
2020-09-02 04:06:15 +03:00
|
|
|
|
|
|
|
def command_exists(cmd):
|
2021-05-24 11:10:39 +03:00
|
|
|
if sys.platform == "win32":
|
|
|
|
result = subprocess.Popen(f'{cmd}', stdout=subprocess.PIPE, shell=True)
|
|
|
|
return result.wait() == 1
|
|
|
|
else:
|
|
|
|
result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True)
|
|
|
|
return result.wait() == 0
|
2020-11-12 22:51:38 +03:00
|
|
|
|
|
|
|
|
2021-07-29 08:42:27 +03:00
|
|
|
def op_envvar(op_name):
|
2020-11-12 22:51:38 +03:00
|
|
|
assert hasattr(ALL_OPS[op_name], 'BUILD_VAR'), \
|
|
|
|
f"{op_name} is missing BUILD_VAR field"
|
2021-07-29 08:42:27 +03:00
|
|
|
return ALL_OPS[op_name].BUILD_VAR
|
|
|
|
|
|
|
|
|
|
|
|
def op_enabled(op_name):
|
|
|
|
env_var = op_envvar(op_name)
|
2020-11-12 22:51:38 +03:00
|
|
|
return int(os.environ.get(env_var, BUILD_OP_DEFAULT))
|
|
|
|
|
|
|
|
|
2021-07-29 08:42:27 +03:00
|
|
|
compatible_ops = dict.fromkeys(ALL_OPS.keys(), False)
|
2020-11-12 22:51:38 +03:00
|
|
|
install_ops = dict.fromkeys(ALL_OPS.keys(), False)
|
|
|
|
for op_name, builder in ALL_OPS.items():
|
|
|
|
op_compatible = builder.is_compatible()
|
2021-07-29 08:42:27 +03:00
|
|
|
compatible_ops[op_name] = op_compatible
|
|
|
|
|
|
|
|
# If op is requested but not available, throw an error
|
|
|
|
if op_enabled(op_name) and not op_compatible:
|
|
|
|
env_var = op_envvar(op_name)
|
|
|
|
if env_var not in os.environ:
|
|
|
|
builder.warning(f"One can disable {op_name} with {env_var}=0")
|
|
|
|
abort(f"Unable to pre-compile {op_name}")
|
2020-11-12 22:51:38 +03:00
|
|
|
|
2022-03-07 21:59:14 +03:00
|
|
|
# if op is compatible but install is not enabled (JIT mode)
|
|
|
|
if is_rocm_pytorch and op_compatible and not op_enabled(op_name):
|
|
|
|
builder.hipify_extension()
|
|
|
|
|
2020-11-12 22:51:38 +03:00
|
|
|
# If op install enabled, add builder to extensions
|
|
|
|
if op_enabled(op_name) and op_compatible:
|
2021-06-17 00:18:37 +03:00
|
|
|
assert torch_available, f"Unable to pre-compile {op_name}, please first install torch"
|
2020-11-12 22:51:38 +03:00
|
|
|
install_ops[op_name] = op_enabled(op_name)
|
|
|
|
ext_modules.append(builder.builder())
|
|
|
|
|
|
|
|
print(f'Install Ops={install_ops}')
|
2020-09-02 04:06:15 +03:00
|
|
|
|
|
|
|
# Write out version/git info
|
|
|
|
git_hash_cmd = "git rev-parse --short HEAD"
|
|
|
|
git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
|
2020-11-12 22:51:38 +03:00
|
|
|
if command_exists('git') and 'DS_BUILD_STRING' not in os.environ:
|
|
|
|
try:
|
|
|
|
result = subprocess.check_output(git_hash_cmd, shell=True)
|
|
|
|
git_hash = result.decode('utf-8').strip()
|
|
|
|
result = subprocess.check_output(git_branch_cmd, shell=True)
|
|
|
|
git_branch = result.decode('utf-8').strip()
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
git_hash = "unknown"
|
|
|
|
git_branch = "unknown"
|
2020-09-02 04:06:15 +03:00
|
|
|
else:
|
|
|
|
git_hash = "unknown"
|
|
|
|
git_branch = "unknown"
|
2020-11-12 22:51:38 +03:00
|
|
|
|
2021-05-25 08:53:44 +03:00
|
|
|
|
|
|
|
def create_dir_symlink(src, dest):
|
|
|
|
if not os.path.islink(dest):
|
|
|
|
if os.path.exists(dest):
|
|
|
|
os.remove(dest)
|
|
|
|
assert not os.path.exists(dest)
|
|
|
|
os.symlink(src, dest)
|
|
|
|
|
|
|
|
|
|
|
|
if sys.platform == "win32":
|
|
|
|
# This creates a symbolic links on Windows.
|
|
|
|
# It needs Administrator privilege to create symlinks on Windows.
|
|
|
|
create_dir_symlink('..\\..\\csrc', '.\\deepspeed\\ops\\csrc')
|
|
|
|
create_dir_symlink('..\\..\\op_builder', '.\\deepspeed\\ops\\op_builder')
|
2022-01-19 03:25:01 +03:00
|
|
|
egg_info.manifest_maker.template = 'MANIFEST_win.in'
|
2021-05-25 08:53:44 +03:00
|
|
|
|
2020-11-12 22:51:38 +03:00
|
|
|
# Parse the DeepSpeed version string from version.txt
|
|
|
|
version_str = open('version.txt', 'r').read().strip()
|
|
|
|
|
|
|
|
# Build specifiers like .devX can be added at install time. Otherwise, add the git hash.
|
|
|
|
# example: DS_BUILD_STR=".dev20201022" python setup.py sdist bdist_wheel
|
|
|
|
|
|
|
|
# Building wheel for distribution, update version file
|
|
|
|
if 'DS_BUILD_STRING' in os.environ:
|
|
|
|
# Build string env specified, probably building for distribution
|
|
|
|
with open('build.txt', 'w') as fd:
|
|
|
|
fd.write(os.environ.get('DS_BUILD_STRING'))
|
|
|
|
version_str += os.environ.get('DS_BUILD_STRING')
|
|
|
|
elif os.path.isfile('build.txt'):
|
|
|
|
# build.txt exists, probably installing from distribution
|
|
|
|
with open('build.txt', 'r') as fd:
|
|
|
|
version_str += fd.read().strip()
|
|
|
|
else:
|
|
|
|
# None of the above, probably installing from source
|
|
|
|
version_str += f'+{git_hash}'
|
|
|
|
|
|
|
|
torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
|
2022-06-07 02:19:00 +03:00
|
|
|
bf16_support = False
|
2020-11-20 00:49:31 +03:00
|
|
|
# Set cuda_version to 0.0 if cpu-only
|
|
|
|
cuda_version = "0.0"
|
2022-06-07 02:19:00 +03:00
|
|
|
nccl_version = "0.0"
|
2022-03-03 04:53:35 +03:00
|
|
|
# Set hip_version to 0.0 if cpu-only
|
|
|
|
hip_version = "0.0"
|
2021-06-17 00:18:37 +03:00
|
|
|
if torch_available and torch.version.cuda is not None:
|
2020-11-20 00:49:31 +03:00
|
|
|
cuda_version = ".".join(torch.version.cuda.split('.')[:2])
|
2022-10-26 03:14:43 +03:00
|
|
|
if sys.platform != "win32":
|
|
|
|
if isinstance(torch.cuda.nccl.version(), int):
|
|
|
|
# This will break if minor version > 9
|
|
|
|
nccl_version = ".".join(str(torch.cuda.nccl.version())[:2])
|
|
|
|
else:
|
|
|
|
nccl_version = ".".join(map(str, torch.cuda.nccl.version()[:2]))
|
2022-07-07 02:17:31 +03:00
|
|
|
if hasattr(torch.cuda, 'is_bf16_supported') and torch.cuda.is_available():
|
2022-06-07 02:19:00 +03:00
|
|
|
bf16_support = torch.cuda.is_bf16_supported()
|
2022-03-03 04:53:35 +03:00
|
|
|
if torch_available and hasattr(torch.version, 'hip') and torch.version.hip is not None:
|
|
|
|
hip_version = ".".join(torch.version.hip.split('.')[:2])
|
|
|
|
torch_info = {
|
|
|
|
"version": torch_version,
|
2022-06-07 02:19:00 +03:00
|
|
|
"bf16_support": bf16_support,
|
2022-03-03 04:53:35 +03:00
|
|
|
"cuda_version": cuda_version,
|
2022-06-07 02:19:00 +03:00
|
|
|
"nccl_version": nccl_version,
|
2022-03-03 04:53:35 +03:00
|
|
|
"hip_version": hip_version
|
|
|
|
}
|
2020-11-12 22:51:38 +03:00
|
|
|
|
|
|
|
print(f"version={version_str}, git_hash={git_hash}, git_branch={git_branch}")
|
2020-09-17 04:57:43 +03:00
|
|
|
with open('deepspeed/git_version_info_installed.py', 'w') as fd:
|
2020-11-12 22:51:38 +03:00
|
|
|
fd.write(f"version='{version_str}'\n")
|
2020-09-02 04:06:15 +03:00
|
|
|
fd.write(f"git_hash='{git_hash}'\n")
|
|
|
|
fd.write(f"git_branch='{git_branch}'\n")
|
2020-09-10 03:14:12 +03:00
|
|
|
fd.write(f"installed_ops={install_ops}\n")
|
2020-11-12 22:51:38 +03:00
|
|
|
fd.write(f"compatible_ops={compatible_ops}\n")
|
|
|
|
fd.write(f"torch_info={torch_info}\n")
|
2020-09-02 04:06:15 +03:00
|
|
|
|
|
|
|
print(f'install_requires={install_requires}')
|
2020-11-12 22:51:38 +03:00
|
|
|
print(f'compatible_ops={compatible_ops}')
|
|
|
|
print(f'ext_modules={ext_modules}')
|
2020-02-01 02:57:11 +03:00
|
|
|
|
2020-11-25 20:43:53 +03:00
|
|
|
# Parse README.md to make long_description for PyPI page.
|
|
|
|
thisdir = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
with open(os.path.join(thisdir, 'README.md'), encoding='utf-8') as fin:
|
|
|
|
readme_text = fin.read()
|
|
|
|
|
2021-03-08 23:54:54 +03:00
|
|
|
start_time = time.time()
|
|
|
|
|
2020-02-01 02:57:11 +03:00
|
|
|
setup(name='deepspeed',
|
2020-11-12 22:51:38 +03:00
|
|
|
version=version_str,
|
2020-02-01 02:57:11 +03:00
|
|
|
description='DeepSpeed library',
|
2020-11-25 20:43:53 +03:00
|
|
|
long_description=readme_text,
|
|
|
|
long_description_content_type='text/markdown',
|
2020-02-01 02:57:11 +03:00
|
|
|
author='DeepSpeed Team',
|
|
|
|
author_email='deepspeed@microsoft.com',
|
2020-09-10 00:37:37 +03:00
|
|
|
url='http://deepspeed.ai',
|
2022-03-06 07:42:03 +03:00
|
|
|
project_urls={
|
|
|
|
'Documentation': 'https://deepspeed.readthedocs.io',
|
|
|
|
'Source': 'https://github.com/microsoft/DeepSpeed',
|
|
|
|
},
|
2020-09-02 04:06:15 +03:00
|
|
|
install_requires=install_requires,
|
2020-11-12 22:51:38 +03:00
|
|
|
extras_require=extras_require,
|
2022-06-16 04:46:30 +03:00
|
|
|
packages=find_packages(exclude=[
|
|
|
|
"azure",
|
|
|
|
"csrc",
|
|
|
|
"docker",
|
|
|
|
"docs",
|
|
|
|
"examples",
|
|
|
|
"op_builder",
|
|
|
|
"release",
|
|
|
|
"requirements",
|
|
|
|
"scripts",
|
|
|
|
"tests"
|
|
|
|
]),
|
2020-11-12 22:51:38 +03:00
|
|
|
include_package_data=True,
|
|
|
|
scripts=[
|
|
|
|
'bin/deepspeed',
|
|
|
|
'bin/deepspeed.pt',
|
|
|
|
'bin/ds',
|
|
|
|
'bin/ds_ssh',
|
2020-12-23 09:26:26 +03:00
|
|
|
'bin/ds_report',
|
2022-06-29 20:49:20 +03:00
|
|
|
'bin/ds_bench',
|
2022-06-16 00:21:19 +03:00
|
|
|
'bin/dsr',
|
2020-12-23 09:26:26 +03:00
|
|
|
'bin/ds_elastic'
|
2020-11-12 22:51:38 +03:00
|
|
|
],
|
2020-09-02 04:06:15 +03:00
|
|
|
classifiers=[
|
|
|
|
'Programming Language :: Python :: 3.6',
|
|
|
|
'Programming Language :: Python :: 3.7',
|
2021-09-13 19:37:32 +03:00
|
|
|
'Programming Language :: Python :: 3.8',
|
2022-12-10 00:53:58 +03:00
|
|
|
'Programming Language :: Python :: 3.9',
|
|
|
|
'Programming Language :: Python :: 3.10'
|
2020-09-02 04:06:15 +03:00
|
|
|
],
|
|
|
|
license='MIT',
|
2020-02-01 02:57:11 +03:00
|
|
|
ext_modules=ext_modules,
|
|
|
|
cmdclass=cmdclass)
|
2021-03-08 23:54:54 +03:00
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
print(f'deepspeed build time = {end_time - start_time} secs')
|