Fix torch.cuda.set_device (#54)

gty111 · web-flow · commit d872eb2d4fd2 · 2025-05-06T21:48:06.000+08:00
diff --git a/gllm/dist_utils.py b/gllm/dist_utils.py
@@ -20,7 +20,7 @@ def recv_tensor(dtype, src):
     tensor_shape = [None for _ in range(dim[0])]
     dist.recv_object_list(tensor_shape,src)
     # recv tensor
-    tensor = torch.zeros(torch.Size(tensor_shape),dtype=dtype,device=f'cuda:{dist.get_rank()}')
+    tensor = torch.zeros(torch.Size(tensor_shape),dtype=dtype,device=f'cuda:{get_local_rank()}')
     dist.recv(tensor,src)
     return tensor
 
@@ -33,7 +33,7 @@ def send_pp_data(output, dst):
         dist.isend(output,dst)
 
 def recv_pp_data(src, dtype, shape, has_residual):
-    hidden_states = torch.zeros(torch.Size(shape),dtype=dtype,device=f'cuda:{dist.get_rank()}')
+    hidden_states = torch.zeros(torch.Size(shape),dtype=dtype,device=f'cuda:{get_local_rank()}')
     if has_residual:
         residual = hidden_states.clone().detach()
         hidden_states_future = dist.irecv(hidden_states,src)
diff --git a/gllm/model_loader.py b/gllm/model_loader.py
@@ -117,5 +117,6 @@ def load_model(self, mp_load_progress=None):
             return model
         else:
             assert self.load_format == 'dummy'
+            torch.set_default_device('cuda')
             model = model_type(self.config)
             return model
diff --git a/gllm/worker.py b/gllm/worker.py
@@ -47,7 +47,7 @@ def init(self):
         
         init_dist(self.pp_size, self.local_rank, self.pp_rank, self.master_addr, 
                   self.master_port, self.assigned_layers)
-        torch.cuda.set_device(f'cuda:{self.pp_rank}')
+        torch.cuda.set_device(f'cuda:{self.local_rank}')
         
         self.comm.init()