Skip to content

Commit

Permalink
Allow accelerator to instantiate the device (#5255)
Browse files Browse the repository at this point in the history
when instantiating torch.device for HPU it cannot be fed with HPU:1
annotation, but only "HPU".
moving the logic to accelerator will allow to solve this issue, with
single line change.

---------

Co-authored-by: Logan Adams <[email protected]>
Co-authored-by: Olatunji Ruwase <[email protected]>
Co-authored-by: Joe Mayer <[email protected]>
  • Loading branch information
4 people authored Aug 15, 2024
1 parent 4ba49dd commit eb07d41
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 7 deletions.
1 change: 0 additions & 1 deletion .github/workflows/hpu-gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ jobs:
(test_flops_profiler.py and test_flops_profiler_in_inference)
test_get_optim_files.py
test_groups.py
test_init_on_device.py
test_partition_balanced.py
(test_adamw.py and TestAdamConfigs)
test_coalesced_collectives.py
Expand Down
5 changes: 2 additions & 3 deletions accelerator/hpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ def handles_memory_backpressure(self):
return True

def device_name(self, device_index=None):
if device_index is None:
return 'hpu'
return 'hpu:{}'.format(device_index)
# ignoring device_index.
return 'hpu'

def device(self, device_index=None):
return torch.device(self.device_name(device_index))
Expand Down
4 changes: 2 additions & 2 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,13 +1009,13 @@ def _set_distributed_vars(self, args):
device_rank = args.device_rank if args is not None and hasattr(args, 'device_rank') else self.local_rank
if device_rank >= 0:
get_accelerator().set_device(device_rank)
self.device = torch.device(get_accelerator().device_name(), device_rank)
self.device = torch.device(get_accelerator().device_name(device_rank))
self.world_size = dist.get_world_size()
self.global_rank = dist.get_rank()
else:
self.world_size = 1
self.global_rank = 0
self.device = torch.device(get_accelerator().device_name())
self.device = get_accelerator().device()

# Configure based on command line arguments
def _configure_with_arguments(self, args, mpu):
Expand Down
1 change: 0 additions & 1 deletion deepspeed/runtime/zero/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def get_lst_from_rank0(lst: List[int]) -> None:
lst_tensor = torch.tensor(
lst if dist.get_rank() == 0 else [-1] * len(lst),
dtype=int,
# device=get_accelerator().current_device_name(),
device=torch.device(get_accelerator().device_name(os.environ["LOCAL_RANK"])),
requires_grad=False,
)
Expand Down

0 comments on commit eb07d41

Please sign in to comment.