зеркало из https://github.com/microsoft/DeepSpeed.git
Fixed the Windows build. (#5596)
Fixed the Windows build. Fixes applied: - Remove some more ops that don't build on Windows. - Remove the use of symlinks that didn't work correctly and replace with `shutil.copytree()`. - Small fixes to make the C++ code compile. Tested with Python 3.9 and CUDA 12.1. --------- Co-authored-by: Costin Eseanu <costineseanu@gmail.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
This commit is contained in:
Родитель
77c949421e
Коммит
e7dd28a23d
|
@ -1,22 +1,15 @@
|
||||||
@echo off
|
@echo off
|
||||||
|
|
||||||
|
set DISTUTILS_USE_SDK=1
|
||||||
|
|
||||||
set DS_BUILD_AIO=0
|
set DS_BUILD_AIO=0
|
||||||
set DS_BUILD_CUTLASS_OPS=0
|
set DS_BUILD_CUTLASS_OPS=0
|
||||||
set DS_BUILD_EVOFORMER_ATTN=0
|
set DS_BUILD_EVOFORMER_ATTN=0
|
||||||
set DS_BUILD_FP_QUANTIZER=0
|
set DS_BUILD_FP_QUANTIZER=0
|
||||||
|
set DS_BUILD_INFERENCE_CORE_OPS=0
|
||||||
set DS_BUILD_RAGGED_DEVICE_OPS=0
|
set DS_BUILD_RAGGED_DEVICE_OPS=0
|
||||||
set DS_BUILD_SPARSE_ATTN=0
|
set DS_BUILD_SPARSE_ATTN=0
|
||||||
|
set DS_BUILD_TRANSFORMER_INFERENCE=0
|
||||||
echo Administrative permissions required. Detecting permissions...
|
|
||||||
|
|
||||||
net session >nul 2>&1
|
|
||||||
if %errorLevel% == 0 (
|
|
||||||
echo Success: Administrative permissions confirmed.
|
|
||||||
) else (
|
|
||||||
echo Failure: Current permissions inadequate.
|
|
||||||
goto end
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
python setup.py bdist_wheel
|
python setup.py bdist_wheel
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#define TILE (128 * 1024 * 1024)
|
#define TILE (128 * 1024 * 1024)
|
||||||
#if defined(__AVX512__) or defined(__AVX256__)
|
#if defined(__AVX512__) or defined(__AVX256__)
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline T readAs(const void* src)
|
inline T readAs(const void* src)
|
||||||
|
|
|
@ -241,7 +241,7 @@ std::vector<at::Tensor> quantized_reduction(at::Tensor& input_vals,
|
||||||
.device(at::kCUDA)
|
.device(at::kCUDA)
|
||||||
.requires_grad(false);
|
.requires_grad(false);
|
||||||
|
|
||||||
std::vector<long int> sz(input_vals.sizes().begin(), input_vals.sizes().end());
|
std::vector<int64_t> sz(input_vals.sizes().begin(), input_vals.sizes().end());
|
||||||
sz[sz.size() - 1] = sz.back() / devices_per_node; // num of GPU per nodes
|
sz[sz.size() - 1] = sz.back() / devices_per_node; // num of GPU per nodes
|
||||||
const int elems_per_in_tensor = at::numel(input_vals) / devices_per_node;
|
const int elems_per_in_tensor = at::numel(input_vals) / devices_per_node;
|
||||||
auto output = torch::empty(sz, output_options);
|
auto output = torch::empty(sz, output_options);
|
||||||
|
|
18
setup.py
18
setup.py
|
@ -19,6 +19,7 @@ The wheel will be located at: dist/*.whl
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
@ -207,21 +208,10 @@ else:
|
||||||
git_hash = "unknown"
|
git_hash = "unknown"
|
||||||
git_branch = "unknown"
|
git_branch = "unknown"
|
||||||
|
|
||||||
|
|
||||||
def create_dir_symlink(src, dest):
|
|
||||||
if not os.path.islink(dest):
|
|
||||||
if os.path.exists(dest):
|
|
||||||
os.remove(dest)
|
|
||||||
assert not os.path.exists(dest)
|
|
||||||
os.symlink(src, dest)
|
|
||||||
|
|
||||||
|
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
# This creates a symbolic links on Windows.
|
shutil.copytree('.\\csrc', '.\\deepspeed\\ops')
|
||||||
# It needs Administrator privilege to create symlinks on Windows.
|
shutil.copytree('.\\op_builder', '.\\deepspeed\\ops')
|
||||||
create_dir_symlink('.\\deepspeed\\ops\\csrc', '..\\..\\csrc')
|
shutil.copytree('.\\accelerator', '.\\deepspeed\\accelerator')
|
||||||
create_dir_symlink('.\\deepspeed\\ops\\op_builder', '..\\..\\op_builder')
|
|
||||||
create_dir_symlink('.\\deepspeed\\accelerator', '..\\accelerator')
|
|
||||||
egg_info.manifest_maker.template = 'MANIFEST_win.in'
|
egg_info.manifest_maker.template = 'MANIFEST_win.in'
|
||||||
|
|
||||||
# Parse the DeepSpeed version string from version.txt.
|
# Parse the DeepSpeed version string from version.txt.
|
||||||
|
|
Загрузка…
Ссылка в новой задаче