Skip to content

Commit 2c402e1

Browse files
committed
fix(pu): fix all_reduce bug in first llm training
1 parent 4567190 commit 2c402e1

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

zoo/jericho/priorzero/priorzero_config.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,17 @@
2121
"description": "Qwen2.5-1.5B-Instruct (balanced performance)",
2222
},
2323
"qwen2.5-3b": {
24-
"model_name_or_path": "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct",
24+
# "model_name_or_path": "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct",
25+
"model_name_or_path": "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-3B-Instruct",
2526
"vllm_tensor_parallel_size": 1,
2627
"gpu_memory_utilization": 0.25,
2728
"description": "Qwen2.5-3B-Instruct (better quality)",
2829
},
2930
"qwen2.5-7b": {
30-
"model_name_or_path": "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct",
31-
"vllm_tensor_parallel_size": 2,
31+
# "model_name_or_path": "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct",
32+
"model_name_or_path": "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-7B-Instruct",
33+
# "vllm_tensor_parallel_size": 2,
34+
"vllm_tensor_parallel_size": 1,
3235
"gpu_memory_utilization": 0.35,
3336
"description": "Qwen2.5-7B-Instruct (high quality, needs 2+ GPUs)",
3437
},
@@ -127,7 +130,8 @@ class PriorZeroLLMConfig:
127130
reward_func: Optional[EasyDict] = field(default_factory=lambda: EasyDict({
128131
"format_reward": True,
129132
"format_param": EasyDict(
130-
{"format_weight": 0.1, }
133+
# {"format_weight": 0.1, }
134+
{"format_weight": 0.5, }
131135
),
132136
}))
133137
# advantage = target_value - pred_value
@@ -185,7 +189,8 @@ def get_priorzero_config(
185189
action_space_size, max_steps = env_configurations.get(env_id, (20, 100))
186190
wm_encoder_option = 'legacy'
187191
# wm_model_name = 'BAAI/bge-base-en-v1.5'
188-
wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'
192+
# wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'
193+
wm_model_name = '/mnt/shared-storage-user/puyuan/xiongjyu/models/bge-base-en-v1.5'
189194

190195
collector_env_num = 1
191196
evaluator_env_num = 2
@@ -208,8 +213,8 @@ def get_priorzero_config(
208213
observation_shape=512,
209214
env_id=env_id,
210215
# game_path=f"/mnt/afs/wanzunian/niuyazhe/xiongjyu/jericho/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
211-
game_path=f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
212-
# game_path=f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
216+
# game_path=f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
217+
game_path=f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
213218
for_unizero=True,
214219
tokenizer_path=wm_model_name,
215220
max_action_num=action_space_size,

zoo/jericho/priorzero/strategy/deepspeed.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,11 @@ def all_reduce(self, data, op="mean"):
538538
ret[k] = self.all_reduce(v, op)
539539
return ret
540540
else:
541+
# [FIX] Handle None values gracefully
542+
# Some metrics (e.g., entropy, fmt_rewards) may be None when disabled
543+
if data is None:
544+
return None
545+
541546
is_tensor = True
542547
if not isinstance(data, torch.Tensor):
543548
data = torch.Tensor([data])
@@ -560,6 +565,10 @@ def all_gather(self, data):
560565
ret[k] = self.all_gather(v)
561566
return ret
562567
else:
568+
# [FIX] Handle None values gracefully
569+
if data is None:
570+
return None
571+
563572
if not isinstance(data, torch.Tensor):
564573
data = torch.Tensor([data])
565574
is_cpu_tensor = data.device.type == "cpu"

0 commit comments

Comments
 (0)