diff --git a/.github/workflows/hpu-gaudi2.yml b/.github/workflows/hpu-gaudi2.yml index 0272829e8..4e9ceb32b 100644 --- a/.github/workflows/hpu-gaudi2.yml +++ b/.github/workflows/hpu-gaudi2.yml @@ -68,7 +68,6 @@ jobs: (test_flops_profiler.py and test_flops_profiler_in_inference) test_get_optim_files.py test_groups.py - test_init_on_device.py test_partition_balanced.py (test_adamw.py and TestAdamConfigs) test_coalesced_collectives.py diff --git a/accelerator/hpu_accelerator.py b/accelerator/hpu_accelerator.py index 485b205f3..1f407e867 100644 --- a/accelerator/hpu_accelerator.py +++ b/accelerator/hpu_accelerator.py @@ -42,9 +42,8 @@ class HPU_Accelerator(DeepSpeedAccelerator): return True def device_name(self, device_index=None): - if device_index is None: - return 'hpu' - return 'hpu:{}'.format(device_index) + # ignoring device_index. + return 'hpu' def device(self, device_index=None): return torch.device(self.device_name(device_index)) diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py index d2839a8f5..27d294b3a 100644 --- a/deepspeed/runtime/engine.py +++ b/deepspeed/runtime/engine.py @@ -1009,13 +1009,13 @@ class DeepSpeedEngine(Module): device_rank = args.device_rank if args is not None and hasattr(args, 'device_rank') else self.local_rank if device_rank >= 0: get_accelerator().set_device(device_rank) - self.device = torch.device(get_accelerator().device_name(), device_rank) + self.device = torch.device(get_accelerator().device_name(device_rank)) self.world_size = dist.get_world_size() self.global_rank = dist.get_rank() else: self.world_size = 1 self.global_rank = 0 - self.device = torch.device(get_accelerator().device_name()) + self.device = get_accelerator().device() # Configure based on command line arguments def _configure_with_arguments(self, args, mpu): diff --git a/deepspeed/runtime/zero/utils.py b/deepspeed/runtime/zero/utils.py index fae725819..8f913d065 100755 --- a/deepspeed/runtime/zero/utils.py +++ b/deepspeed/runtime/zero/utils.py @@ -68,7 +68,6 @@ def get_lst_from_rank0(lst: List[int]) -> None: lst_tensor = torch.tensor( lst if dist.get_rank() == 0 else [-1] * len(lst), dtype=int, - # device=get_accelerator().current_device_name(), device=torch.device(get_accelerator().device_name(os.environ["LOCAL_RANK"])), requires_grad=False, )