@@ -9,25 +9,25 @@ def main(env_id, seed):
99 # ==============================================================
1010 # begin of the most frequently changed config specified by the user
1111 # ==============================================================
12- # collector_env_num = 8
13- # num_segments = 8
14- # evaluator_env_num = 3
12+ collector_env_num = 8
13+ num_segments = 8
14+ evaluator_env_num = 3
1515
16- collector_env_num = 1
17- num_segments = 1
18- evaluator_env_num = 1
16+ # collector_env_num = 1
17+ # num_segments = 1
18+ # evaluator_env_num = 1
1919
2020 num_simulations = 50
2121 collect_num_simulations = 25
2222 # collect_num_simulations = 50
2323 eval_num_simulations = 50
2424 # max_env_step = int(5e5)
2525 max_env_step = int (50e6 )
26- # batch_size = 256
27- batch_size = 64 # debug
26+ batch_size = 256
27+ # batch_size = 64 # debug
2828 num_layers = 2
29- replay_ratio = 0.25
30- # replay_ratio = 0.1
29+ # replay_ratio = 0.25
30+ replay_ratio = 0.1
3131
3232 game_segment_length = 20
3333 num_unroll_steps = 10
@@ -114,14 +114,14 @@ def main(env_id, seed):
114114 # final_norm_option_in_obs_head="LayerNorm",
115115 # predict_latent_loss_type='mse',
116116
117- final_norm_option_in_encoder = 'L2Norm' ,
118- final_norm_option_in_obs_head = "L2Norm" ,
119- predict_latent_loss_type = 'mse' ,
120-
121- # final_norm_option_in_encoder="LayerNorm",
122- # final_norm_option_in_obs_head="LayerNorm",
117+ # final_norm_option_in_encoder='L2Norm',
118+ # final_norm_option_in_obs_head="L2Norm",
123119 # predict_latent_loss_type='mse',
124120
121+ final_norm_option_in_encoder = "LayerNorm" ,
122+ final_norm_option_in_obs_head = "LayerNorm" ,
123+ predict_latent_loss_type = 'mse' ,
124+
125125 # final_norm_option_in_encoder="SimNorm",
126126 # final_norm_option_in_obs_head="SimNorm",
127127 # predict_latent_loss_type='group_kl',
@@ -192,7 +192,17 @@ def main(env_id, seed):
192192
193193 # ============ use muzero_segment_collector instead of muzero_collector =============
194194 from lzero .entry import train_unizero_segment
195- main_config .exp_name = f'data_unizero_longrun_20250819/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_fix-init-recur_clear20_muzerolossweight_spsi20_envnum{ collector_env_num } _encoder-head-l2norm_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
195+
196+ main_config .exp_name = f'data_unizero_longrun_20250819/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_fix-init-recur_clear20_mulossweight_spsi20_envnum{ collector_env_num } _encoder-head-ln_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
197+
198+
199+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_origlossweight_spsi20_envnum{collector_env_num}_encoder-head-l2norm_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
200+
201+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_origlossweight_spsi20_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
202+
203+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_muzerolossweight_spsi20_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
204+
205+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_origlossweight_spsi20_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
196206
197207 # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_lrucache-init-recur_clear20_muzerolossweight_spsi20_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
198208
@@ -246,7 +256,7 @@ def main(env_id, seed):
246256 main (args .env , args .seed )
247257
248258 """
249- export CUDA_VISIBLE_DEVICES=4
259+ export CUDA_VISIBLE_DEVICES=0
250260 cd /fs-computility/niuyazhe/puyuan/code/LightZero
251261 python /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/atari/config/atari_unizero_segment_config.py
252262 """
0 commit comments