зеркало из https://github.com/microsoft/DeepSpeed.git
Fixed the Windows build. (#5596)
Fixed the Windows build. Fixes applied: - Remove some more ops that don't build on Windows. - Remove the use of symlinks that didn't work correctly and replace with `shutil.copytree()`. - Small fixes to make the C++ code compile. Tested with Python 3.9 and CUDA 12.1. --------- Co-authored-by: Costin Eseanu <costineseanu@gmail.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
This commit is contained in:
Родитель
77c949421e
Коммит
e7dd28a23d
|
@ -1,22 +1,15 @@
|
|||
@echo off
|
||||
|
||||
set DISTUTILS_USE_SDK=1
|
||||
|
||||
set DS_BUILD_AIO=0
|
||||
set DS_BUILD_CUTLASS_OPS=0
|
||||
set DS_BUILD_EVOFORMER_ATTN=0
|
||||
set DS_BUILD_FP_QUANTIZER=0
|
||||
set DS_BUILD_INFERENCE_CORE_OPS=0
|
||||
set DS_BUILD_RAGGED_DEVICE_OPS=0
|
||||
set DS_BUILD_SPARSE_ATTN=0
|
||||
|
||||
echo Administrative permissions required. Detecting permissions...
|
||||
|
||||
net session >nul 2>&1
|
||||
if %errorLevel% == 0 (
|
||||
echo Success: Administrative permissions confirmed.
|
||||
) else (
|
||||
echo Failure: Current permissions inadequate.
|
||||
goto end
|
||||
)
|
||||
|
||||
set DS_BUILD_TRANSFORMER_INFERENCE=0
|
||||
|
||||
python setup.py bdist_wheel
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#define TILE (128 * 1024 * 1024)
|
||||
#if defined(__AVX512__) or defined(__AVX256__)
|
||||
#include <immintrin.h>
|
||||
|
||||
template <typename T>
|
||||
inline T readAs(const void* src)
|
||||
|
|
|
@ -241,7 +241,7 @@ std::vector<at::Tensor> quantized_reduction(at::Tensor& input_vals,
|
|||
.device(at::kCUDA)
|
||||
.requires_grad(false);
|
||||
|
||||
std::vector<long int> sz(input_vals.sizes().begin(), input_vals.sizes().end());
|
||||
std::vector<int64_t> sz(input_vals.sizes().begin(), input_vals.sizes().end());
|
||||
sz[sz.size() - 1] = sz.back() / devices_per_node; // num of GPU per nodes
|
||||
const int elems_per_in_tensor = at::numel(input_vals) / devices_per_node;
|
||||
auto output = torch::empty(sz, output_options);
|
||||
|
|
18
setup.py
18
setup.py
|
@ -19,6 +19,7 @@ The wheel will be located at: dist/*.whl
|
|||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import subprocess
|
||||
from setuptools import setup, find_packages
|
||||
|
@ -207,21 +208,10 @@ else:
|
|||
git_hash = "unknown"
|
||||
git_branch = "unknown"
|
||||
|
||||
|
||||
def create_dir_symlink(src, dest):
|
||||
if not os.path.islink(dest):
|
||||
if os.path.exists(dest):
|
||||
os.remove(dest)
|
||||
assert not os.path.exists(dest)
|
||||
os.symlink(src, dest)
|
||||
|
||||
|
||||
if sys.platform == "win32":
|
||||
# This creates a symbolic links on Windows.
|
||||
# It needs Administrator privilege to create symlinks on Windows.
|
||||
create_dir_symlink('.\\deepspeed\\ops\\csrc', '..\\..\\csrc')
|
||||
create_dir_symlink('.\\deepspeed\\ops\\op_builder', '..\\..\\op_builder')
|
||||
create_dir_symlink('.\\deepspeed\\accelerator', '..\\accelerator')
|
||||
shutil.copytree('.\\csrc', '.\\deepspeed\\ops')
|
||||
shutil.copytree('.\\op_builder', '.\\deepspeed\\ops')
|
||||
shutil.copytree('.\\accelerator', '.\\deepspeed\\accelerator')
|
||||
egg_info.manifest_maker.template = 'MANIFEST_win.in'
|
||||
|
||||
# Parse the DeepSpeed version string from version.txt.
|
||||
|
|
Загрузка…
Ссылка в новой задаче