fix(pu): fix all_reduce bug in first llm training

puyuan1996 · puyuan1996 · commit 2c402e16d9ae · 2026-02-08T21:24:03.000+08:00
diff --git a/zoo/jericho/priorzero/priorzero_config.py b/zoo/jericho/priorzero/priorzero_config.py
@@ -21,14 +21,17 @@
         "description": "Qwen2.5-1.5B-Instruct (balanced performance)",
     },
     "qwen2.5-3b": {
-        "model_name_or_path": "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct",
+        # "model_name_or_path": "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct",
+        "model_name_or_path": "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-3B-Instruct",
         "vllm_tensor_parallel_size": 1,
         "gpu_memory_utilization": 0.25,
         "description": "Qwen2.5-3B-Instruct (better quality)",
     },
     "qwen2.5-7b": {
-        "model_name_or_path": "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct",
-        "vllm_tensor_parallel_size": 2,
+        # "model_name_or_path": "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct",
+        "model_name_or_path": "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-7B-Instruct",
+        # "vllm_tensor_parallel_size": 2,
+          "vllm_tensor_parallel_size": 1,
         "gpu_memory_utilization": 0.35,
         "description": "Qwen2.5-7B-Instruct (high quality, needs 2+ GPUs)",
     },
@@ -127,7 +130,8 @@ class PriorZeroLLMConfig:
     reward_func: Optional[EasyDict] = field(default_factory=lambda: EasyDict({
         "format_reward": True,
         "format_param": EasyDict(
-            {"format_weight": 0.1, }
+            # {"format_weight": 0.1, }
+            {"format_weight": 0.5, }
         ),
     }))
     # advantage = target_value - pred_value 
@@ -185,7 +189,8 @@ def get_priorzero_config(
     action_space_size, max_steps = env_configurations.get(env_id, (20, 100))
     wm_encoder_option = 'legacy' 
     # wm_model_name = 'BAAI/bge-base-en-v1.5'  
-    wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'  
+    # wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'  
+    wm_model_name = '/mnt/shared-storage-user/puyuan/xiongjyu/models/bge-base-en-v1.5' 
     
     collector_env_num = 1
     evaluator_env_num = 2
@@ -208,8 +213,8 @@ def get_priorzero_config(
         observation_shape=512,  
         env_id=env_id,
         # game_path=f"/mnt/afs/wanzunian/niuyazhe/xiongjyu/jericho/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
-        game_path=f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
-        # game_path=f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
+        # game_path=f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
+        game_path=f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
         for_unizero=True,
         tokenizer_path=wm_model_name,
         max_action_num=action_space_size,
diff --git a/zoo/jericho/priorzero/strategy/deepspeed.py b/zoo/jericho/priorzero/strategy/deepspeed.py
@@ -538,6 +538,11 @@ def all_reduce(self, data, op="mean"):
                 ret[k] = self.all_reduce(v, op)
             return ret
         else:
+            # [FIX] Handle None values gracefully
+            # Some metrics (e.g., entropy, fmt_rewards) may be None when disabled
+            if data is None:
+                return None
+
             is_tensor = True
             if not isinstance(data, torch.Tensor):
                 data = torch.Tensor([data])
@@ -560,6 +565,10 @@ def all_gather(self, data):
                 ret[k] = self.all_gather(v)
             return ret
         else:
+            # [FIX] Handle None values gracefully
+            if data is None:
+                return None
+
             if not isinstance(data, torch.Tensor):
                 data = torch.Tensor([data])
             is_cpu_tensor = data.device.type == "cpu"