@@ -11,8 +11,8 @@ def main(env_id, seed):
1111 collector_env_num = 8
1212 num_segments = 8
1313
14- # game_segment_length = 20
15- game_segment_length = 400 # TODO
14+ game_segment_length = 20
15+ # game_segment_length = 400 # TODO
1616
1717 evaluator_env_num = 3
1818 num_simulations = 50
@@ -76,10 +76,10 @@ def main(env_id, seed):
7676 reward_support_range = (- 300. , 301. , 1. ),
7777 value_support_range = (- 300. , 301. , 1. ),
7878 norm_type = norm_type ,
79- # num_res_blocks=1,
80- # num_channels=64,
81- num_res_blocks = 2 ,
82- num_channels = 128 ,
79+ num_res_blocks = 1 ,
80+ num_channels = 64 ,
81+ # num_res_blocks=2,
82+ # num_channels=128,
8383 world_model_cfg = dict (
8484 norm_type = norm_type ,
8585 final_norm_option_in_obs_head = 'LayerNorm' ,
@@ -161,8 +161,8 @@ def main(env_id, seed):
161161 # (float) 退火的结束 clip 值 (训练后期,较严格)。
162162 encoder_clip_end_value = 10.0 ,
163163 # (int) 完成从起始值到结束值的退火所需的训练迭代步数。
164- encoder_clip_anneal_steps = 400000 , # 例如,在400k次迭代后达到最终值
165- # encoder_clip_anneal_steps=100000, # 例如,在100k次迭代后达到最终值
164+ # encoder_clip_anneal_steps=400000, # 例如,在400k次迭代后达到最终值
165+ encoder_clip_anneal_steps = 100000 , # 例如,在100k次迭代后达到最终值
166166
167167 # ==================== START: label smooth ====================
168168 policy_ls_eps_start = 0.05 , #TODO============= good start in Pong and MsPacman
@@ -225,7 +225,9 @@ def main(env_id, seed):
225225
226226 # ============ use muzero_segment_collector instead of muzero_collector =============
227227 from lzero .entry import train_unizero_segment
228- main_config .exp_name = f'data_unizero_st_refactor1010/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_ch128-res2_targetentropy-alpha-100k-098-07-encoder-clip30-10-400k_label-smooth_resnet-encoder_priority_adamw-wd1e-2-encoder1-trans1-head1_ln-inner-ln_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _seed{ seed } '
228+ main_config .exp_name = f'data_unizero_st_refactor1010/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_ch64-res1_targetentropy-alpha-100k-098-07-encoder-clip30-10-100k_label-smooth_resnet-encoder_priority_adamw-wd1e-2-encoder5-trans1-head0-true_ln-inner-ln_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _seed{ seed } '
229+
230+ # main_config.exp_name = f'data_unizero_st_refactor1010/{env_id[:-14]}/{env_id[:-14]}_uz_ch128-res2_targetentropy-alpha-100k-098-07-encoder-clip30-10-400k_label-smooth_resnet-encoder_priority_adamw-wd1e-2-encoder1-trans1-head1_ln-inner-ln_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_seed{seed}'
229231 train_unizero_segment ([main_config , create_config ], seed = seed , model_path = main_config .policy .model_path , max_env_step = max_env_step )
230232
231233
@@ -262,7 +264,7 @@ def main(env_id, seed):
262264 tmux new -s uz-st-refactor-boxing
263265
264266 conda activate /mnt/nfs/zhangjinouwen/puyuan/conda_envs/lz
265- export CUDA_VISIBLE_DEVICES=4
267+ export CUDA_VISIBLE_DEVICES=6
266268 cd /mnt/nfs/zhangjinouwen/puyuan/LightZero
267269 python /mnt/nfs/zhangjinouwen/puyuan/LightZero/zoo/atari/config/atari_unizero_segment_config.py 2>&1 | tee /mnt/nfs/zhangjinouwen/puyuan/LightZero/log/20251010_uz_st_ch128-res2_fix-encoder-clip_qbert.log
268270 """
0 commit comments