From 3c490f9cf45e6bf7841932cba3d8f3623a04c461 Mon Sep 17 00:00:00 2001
From: andyG <135115931+Andy666G@users.noreply.github.com>
Date: Fri, 2 Aug 2024 04:28:55 +0800
Subject: [PATCH] Use accelerator to replace cuda in setup and runner (#5769)

Use accelerator apis to select device in setup.py and set visible
devices env in runner.py

---------

Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
---
 deepspeed/launcher/runner.py | 13 +++++++------
 setup.py                     |  7 ++++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py
index 12dd629b1..f140d73ee 100755
--- a/deepspeed/launcher/runner.py
+++ b/deepspeed/launcher/runner.py
@@ -403,18 +403,19 @@ def main(args=None):
 
     resource_pool = fetch_hostfile(args.hostfile)
 
-    # respect CUDA_VISIBLE_DEVICES for a single node and no explicit resource filters
-    cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
-    if not resource_pool and len(cuda_visible_devices):
-        detected_str = f"Detected CUDA_VISIBLE_DEVICES={cuda_visible_devices}"
+    # respect VISIBLE_DEVICES for a single node and no explicit resource filters
+    visible_devices_env = get_accelerator().visible_devices_envs()[0]
+    visible_devices = os.environ.get(visible_devices_env, "")
+    if not resource_pool and len(visible_devices):
+        detected_str = f"Detected VISIBLE_DEVICES={visible_devices}"
         if len(args.include) or len(args.exclude) or args.num_nodes > 1 or args.num_gpus > 0:
             print(
                 f"{detected_str} but ignoring it because one or several of --include/--exclude/--num_gpus/--num_nodes cl args were used. If you want to use CUDA_VISIBLE_DEVICES don't pass any of these arguments to deepspeed."
             )
         else:
-            args.include = f"localhost:{cuda_visible_devices}"
+            args.include = f"localhost:{visible_devices}"
             print(f"{detected_str}: setting --include={args.include}")
-        del os.environ["CUDA_VISIBLE_DEVICES"]
+        del os.environ[visible_devices_env]
 
     if args.num_nodes >= 0 or args.num_gpus >= 0:
         if args.include != "" or args.exclude != "":
diff --git a/setup.py b/setup.py
index 183d42907..2b7555361 100755
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,8 @@ from op_builder import get_default_compute_capabilities, OpBuilder
 from op_builder.all_ops import ALL_OPS, accelerator_name
 from op_builder.builder import installed_cuda_version
 
+from accelerator import get_accelerator
+
 # Fetch rocm state.
 is_rocm_pytorch = OpBuilder.is_rocm_pytorch()
 rocm_version = OpBuilder.installed_rocm_version()
@@ -91,7 +93,7 @@ extras_require = {
 }
 
 # Add specific cupy version to both onebit extension variants.
-if torch_available and torch.cuda.is_available():
+if torch_available and get_accelerator().device_name() == 'cuda':
     cupy = None
     if is_rocm_pytorch:
         rocm_major, rocm_minor = rocm_version
@@ -120,7 +122,6 @@ cmdclass = {}
 
 # For any pre-installed ops force disable ninja.
 if torch_available:
-    from accelerator import get_accelerator
     use_ninja = is_env_set("DS_ENABLE_NINJA")
     cmdclass['build_ext'] = get_accelerator().build_extension().with_options(use_ninja=use_ninja)
 
@@ -131,7 +132,7 @@ else:
     TORCH_MAJOR = "0"
     TORCH_MINOR = "0"
 
-if torch_available and not torch.cuda.is_available():
+if torch_available and not get_accelerator().device_name() == 'cuda':
     # Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486.
     print("[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
           "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "