@@ -9,13 +9,13 @@ def main(env_id, seed):
99 # ==============================================================
1010 # begin of the most frequently changed config specified by the user
1111 # ==============================================================
12- collector_env_num = 8
13- num_segments = 8
14- evaluator_env_num = 3
12+ # collector_env_num = 8
13+ # num_segments = 8
14+ # evaluator_env_num = 3
1515
16- # collector_env_num = 1
17- # num_segments = 1
18- # evaluator_env_num = 1
16+ collector_env_num = 1
17+ num_segments = 1
18+ evaluator_env_num = 1
1919
2020 num_simulations = 50
2121 collect_num_simulations = 25
@@ -25,6 +25,8 @@ def main(env_id, seed):
2525 max_env_step = int (50e6 )
2626 batch_size = 256
2727 # batch_size = 64 # debug
28+ # batch_size = 4 # debug
29+
2830 num_layers = 2
2931 # replay_ratio = 0.25
3032 replay_ratio = 0.1
@@ -33,6 +35,10 @@ def main(env_id, seed):
3335 num_unroll_steps = 10
3436 infer_context_length = 4
3537
38+ # game_segment_length = 40
39+ # num_unroll_steps = 20
40+ # infer_context_length = 8
41+
3642 # game_segment_length = 200
3743 # num_unroll_steps = 16
3844 # infer_context_length = 8
@@ -93,6 +99,8 @@ def main(env_id, seed):
9399 norm_type = norm_type ,
94100 num_res_blocks = 2 ,
95101 num_channels = 128 ,
102+ # num_res_blocks=1, # TODO
103+ # num_channels=64,
96104 support_size = 601 ,
97105 policy_entropy_weight = 5e-3 ,
98106 # policy_entropy_weight=5e-2, # TODO(pu)
@@ -125,6 +133,13 @@ def main(env_id, seed):
125133 # final_norm_option_in_encoder="SimNorm",
126134 # final_norm_option_in_obs_head="SimNorm",
127135 # predict_latent_loss_type='group_kl',
136+
137+ # weight_decay=1e-2,
138+ # latent_norm_loss=True,
139+
140+ latent_norm_loss = False ,
141+ weight_decay = 1e-4 , # TODO
142+
128143 ),
129144 ),
130145 # gradient_scale=True, #TODO
@@ -160,8 +175,8 @@ def main(env_id, seed):
160175 grad_clip_value = 5 ,
161176 replay_buffer_size = int (1e6 ),
162177 # eval_freq=int(5e3),
163- # eval_freq=int(1e4),
164- eval_freq = int (2e4 ),
178+ eval_freq = int (1e4 ), # TODO
179+ # eval_freq=int(2e4),
165180 collector_env_num = collector_env_num ,
166181 evaluator_env_num = evaluator_env_num ,
167182 # ============= The key different params for reanalyze =============
@@ -193,8 +208,10 @@ def main(env_id, seed):
193208 # ============ use muzero_segment_collector instead of muzero_collector =============
194209 from lzero .entry import train_unizero_segment
195210
196- main_config .exp_name = f'data_unizero_longrun_20250819/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_fix-init-recur_clear20_mulossweight_spsi20_envnum{ collector_env_num } _encoder-head-ln_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
211+ # main_config.exp_name = f'data_unizero_longrun_20250827/{env_id[:-14]}/{env_id[:-14]}_uz_wd1e-2_fix-init-recur_clear{game_segment_length}_originlossweight_spsi{game_segment_length}_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
212+ main_config .exp_name = f'data_unizero_longrun_20250827/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_lnlw001_fix-init-recur_clear{ game_segment_length } _originlossweight_spsi{ game_segment_length } _envnum{ collector_env_num } _encoder-head-ln_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
197213
214+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear{game_segment_length}_mulossweight_spsi{game_segment_length}_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
198215
199216 # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_origlossweight_spsi20_envnum{collector_env_num}_encoder-head-l2norm_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
200217
@@ -256,7 +273,7 @@ def main(env_id, seed):
256273 main (args .env , args .seed )
257274
258275 """
259- export CUDA_VISIBLE_DEVICES=0
276+ export CUDA_VISIBLE_DEVICES=6
260277 cd /fs-computility/niuyazhe/puyuan/code/LightZero
261278 python /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/atari/config/atari_unizero_segment_config.py
262279 """
0 commit comments