2121 "description" : "Qwen2.5-1.5B-Instruct (balanced performance)" ,
2222 },
2323 "qwen2.5-3b" : {
24- "model_name_or_path" : "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct" ,
24+ # "model_name_or_path": "/mnt/afs/niuyazhe/workspace/xiongjyu/models/Qwen2.5-3B-Instruct",
25+ "model_name_or_path" : "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-3B-Instruct" ,
2526 "vllm_tensor_parallel_size" : 1 ,
2627 "gpu_memory_utilization" : 0.25 ,
2728 "description" : "Qwen2.5-3B-Instruct (better quality)" ,
2829 },
2930 "qwen2.5-7b" : {
30- "model_name_or_path" : "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct" ,
31- "vllm_tensor_parallel_size" : 2 ,
31+ # "model_name_or_path": "/mnt/shared-storage-user/puyuan/model/Qwen2.5-7B-Instruct",
32+ "model_name_or_path" : "/mnt/shared-storage-user/puyuan/xiongjyu/models/Qwen2.5-7B-Instruct" ,
33+ # "vllm_tensor_parallel_size": 2,
34+ "vllm_tensor_parallel_size" : 1 ,
3235 "gpu_memory_utilization" : 0.35 ,
3336 "description" : "Qwen2.5-7B-Instruct (high quality, needs 2+ GPUs)" ,
3437 },
@@ -127,7 +130,8 @@ class PriorZeroLLMConfig:
127130 reward_func : Optional [EasyDict ] = field (default_factory = lambda : EasyDict ({
128131 "format_reward" : True ,
129132 "format_param" : EasyDict (
130- {"format_weight" : 0.1 , }
133+ # {"format_weight": 0.1, }
134+ {"format_weight" : 0.5 , }
131135 ),
132136 }))
133137 # advantage = target_value - pred_value
@@ -185,7 +189,8 @@ def get_priorzero_config(
185189 action_space_size , max_steps = env_configurations .get (env_id , (20 , 100 ))
186190 wm_encoder_option = 'legacy'
187191 # wm_model_name = 'BAAI/bge-base-en-v1.5'
188- wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'
192+ # wm_model_name = '/mnt/afs/niuyazhe/workspace/xiongjyu/models/bge-base-en-v1.5'
193+ wm_model_name = '/mnt/shared-storage-user/puyuan/xiongjyu/models/bge-base-en-v1.5'
189194
190195 collector_env_num = 1
191196 evaluator_env_num = 2
@@ -208,8 +213,8 @@ def get_priorzero_config(
208213 observation_shape = 512 ,
209214 env_id = env_id ,
210215 # game_path=f"/mnt/afs/wanzunian/niuyazhe/xiongjyu/jericho/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
211- game_path = f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{ env_id } " ,
212- # game_path=f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
216+ # game_path=f"/mnt/afs/niuyazhe/workspace/xiongjyu/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{env_id}",
217+ game_path = f"/mnt/shared-storage-user/puyuan/code/LightZero/zoo/jericho/envs/z-machine-games-master/jericho-game-suite/{ env_id } " ,
213218 for_unizero = True ,
214219 tokenizer_path = wm_model_name ,
215220 max_action_num = action_space_size ,
0 commit comments