Fixed the Windows build.

Fixes applied:
- Remove some more ops that don't build on Windows.
- Remove the use of symlinks that didn't work correctly and replace with
`shutil.copytree()`.
- Small fixes to make the C++ code compile.

Tested with Python 3.9 and CUDA 12.1.

---------

Co-authored-by: Costin Eseanu <costineseanu@gmail.com>
Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
This commit is contained in:
Costin Eseanu 2024-05-31 15:11:10 -07:00 коммит произвёл GitHub
Родитель 77c949421e
Коммит e7dd28a23d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
4 изменённых файлов: 10 добавлений и 26 удалений

Просмотреть файл

@ -1,22 +1,15 @@
@echo off @echo off
set DISTUTILS_USE_SDK=1
set DS_BUILD_AIO=0 set DS_BUILD_AIO=0
set DS_BUILD_CUTLASS_OPS=0 set DS_BUILD_CUTLASS_OPS=0
set DS_BUILD_EVOFORMER_ATTN=0 set DS_BUILD_EVOFORMER_ATTN=0
set DS_BUILD_FP_QUANTIZER=0 set DS_BUILD_FP_QUANTIZER=0
set DS_BUILD_INFERENCE_CORE_OPS=0
set DS_BUILD_RAGGED_DEVICE_OPS=0 set DS_BUILD_RAGGED_DEVICE_OPS=0
set DS_BUILD_SPARSE_ATTN=0 set DS_BUILD_SPARSE_ATTN=0
set DS_BUILD_TRANSFORMER_INFERENCE=0
echo Administrative permissions required. Detecting permissions...
net session >nul 2>&1
if %errorLevel% == 0 (
echo Success: Administrative permissions confirmed.
) else (
echo Failure: Current permissions inadequate.
goto end
)
python setup.py bdist_wheel python setup.py bdist_wheel

Просмотреть файл

@ -12,6 +12,7 @@
#define TILE (128 * 1024 * 1024) #define TILE (128 * 1024 * 1024)
#if defined(__AVX512__) or defined(__AVX256__) #if defined(__AVX512__) or defined(__AVX256__)
#include <immintrin.h>
template <typename T> template <typename T>
inline T readAs(const void* src) inline T readAs(const void* src)

Просмотреть файл

@ -241,7 +241,7 @@ std::vector<at::Tensor> quantized_reduction(at::Tensor& input_vals,
.device(at::kCUDA) .device(at::kCUDA)
.requires_grad(false); .requires_grad(false);
std::vector<long int> sz(input_vals.sizes().begin(), input_vals.sizes().end()); std::vector<int64_t> sz(input_vals.sizes().begin(), input_vals.sizes().end());
sz[sz.size() - 1] = sz.back() / devices_per_node; // num of GPU per nodes sz[sz.size() - 1] = sz.back() / devices_per_node; // num of GPU per nodes
const int elems_per_in_tensor = at::numel(input_vals) / devices_per_node; const int elems_per_in_tensor = at::numel(input_vals) / devices_per_node;
auto output = torch::empty(sz, output_options); auto output = torch::empty(sz, output_options);

Просмотреть файл

@ -19,6 +19,7 @@ The wheel will be located at: dist/*.whl
""" """
import os import os
import shutil
import sys import sys
import subprocess import subprocess
from setuptools import setup, find_packages from setuptools import setup, find_packages
@ -207,21 +208,10 @@ else:
git_hash = "unknown" git_hash = "unknown"
git_branch = "unknown" git_branch = "unknown"
def create_dir_symlink(src, dest):
if not os.path.islink(dest):
if os.path.exists(dest):
os.remove(dest)
assert not os.path.exists(dest)
os.symlink(src, dest)
if sys.platform == "win32": if sys.platform == "win32":
# This creates a symbolic links on Windows. shutil.copytree('.\\csrc', '.\\deepspeed\\ops')
# It needs Administrator privilege to create symlinks on Windows. shutil.copytree('.\\op_builder', '.\\deepspeed\\ops')
create_dir_symlink('.\\deepspeed\\ops\\csrc', '..\\..\\csrc') shutil.copytree('.\\accelerator', '.\\deepspeed\\accelerator')
create_dir_symlink('.\\deepspeed\\ops\\op_builder', '..\\..\\op_builder')
create_dir_symlink('.\\deepspeed\\accelerator', '..\\accelerator')
egg_info.manifest_maker.template = 'MANIFEST_win.in' egg_info.manifest_maker.template = 'MANIFEST_win.in'
# Parse the DeepSpeed version string from version.txt. # Parse the DeepSpeed version string from version.txt.