@@ -73,6 +73,10 @@ def main(env_id, seed):
7373 reward_support_range = (- 300. , 301. , 1. ),
7474 value_support_range = (- 300. , 301. , 1. ),
7575 norm_type = norm_type ,
76+ # num_res_blocks=1,
77+ # num_channels=64,
78+ num_res_blocks = 2 ,
79+ num_channels = 128 ,
7680 world_model_cfg = dict (
7781 norm_type = norm_type ,
7882 final_norm_option_in_obs_head = 'LayerNorm' ,
@@ -138,10 +142,10 @@ def main(env_id, seed):
138142 # adaptive_entropy_alpha_lr=1e-3,
139143 target_entropy_start_ratio = 0.98 ,
140144 # target_entropy_end_ratio =0.9,
141- target_entropy_end_ratio = 0.7 ,
142- target_entropy_decay_steps = 100000 , # 例如,在100k次迭代后达到最终值 需要与replay ratio协同调整
143- # target_entropy_end_ratio =0.5, # TODO=====
144- # target_entropy_decay_steps = 400000, # 例如,在100k次迭代后达到最终值 需要与replay ratio协同调整
145+ # target_entropy_end_ratio =0.7,
146+ # target_entropy_decay_steps = 100000, # 例如,在100k次迭代后达到最终值 需要与replay ratio协同调整
147+ target_entropy_end_ratio = 0.5 , # TODO=====
148+ target_entropy_decay_steps = 400000 , # 例如,在100k次迭代后达到最终值 需要与replay ratio协同调整
145149
146150
147151 # ==================== START: Encoder-Clip Annealing Config ====================
@@ -217,7 +221,7 @@ def main(env_id, seed):
217221
218222 # ============ use muzero_segment_collector instead of muzero_collector =============
219223 from lzero .entry import train_unizero_segment
220- main_config .exp_name = f'data_unizero_st_refactor1010/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_targetentropy- alpha-100k -098-07 -encoder-clip30-10-100k_label -smooth_resnet-encoder_priority_adamw-wd1e-2-encoder1-trans1-head1_ln-inner-ln_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _seed{ seed } '
224+ main_config .exp_name = f'data_unizero_st_refactor1010/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_ch128-res2_targetentropy- alpha-400k -098-05 -encoder-clip30-10-100k-true_label -smooth_resnet-encoder_priority_adamw-wd1e-2-encoder1-trans1-head1_ln-inner-ln_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _seed{ seed } '
221225 train_unizero_segment ([main_config , create_config ], seed = seed , model_path = main_config .policy .model_path , max_env_step = max_env_step )
222226
223227
@@ -240,21 +244,21 @@ def main(env_id, seed):
240244 # args.env = 'AlienNoFrameskip-v4'
241245
242246 # 下面是atari8以外的2个代表环境
243- # args.env = 'QbertNoFrameskip-v4' # 记忆规划型环境 稀疏奖励
247+ args .env = 'QbertNoFrameskip-v4' # 记忆规划型环境 稀疏奖励
244248 # args.env = 'SpaceInvadersNoFrameskip-v4' # 记忆规划型环境 稀疏奖励
245249
246250 # 下面是已经表现不错的
247251 # args.env = 'BoxingNoFrameskip-v4' # 反应型环境 密集奖励
248252 # args.env = 'ChopperCommandNoFrameskip-v4'
249- args .env = 'RoadRunnerNoFrameskip-v4'
253+ # args.env = 'RoadRunnerNoFrameskip-v4'
250254
251255 main (args .env , args .seed )
252256
253257 """
254258 tmux new -s uz-st-refactor-boxing
255259
256260 conda activate /mnt/nfs/zhangjinouwen/puyuan/conda_envs/lz
257- export CUDA_VISIBLE_DEVICES=5
261+ export CUDA_VISIBLE_DEVICES=1
258262 cd /mnt/nfs/zhangjinouwen/puyuan/LightZero
259- python /mnt/nfs/zhangjinouwen/puyuan/LightZero/zoo/atari/config/atari_unizero_segment_config.py 2>&1 | tee /mnt/nfs/zhangjinouwen/puyuan/LightZero/log/20251010_fix_uz_st_road .log
263+ python /mnt/nfs/zhangjinouwen/puyuan/LightZero/zoo/atari/config/atari_unizero_segment_config.py 2>&1 | tee /mnt/nfs/zhangjinouwen/puyuan/LightZero/log/20251010_uz_st_ch128-res2_fix-encoder-clip_qbert .log
260264 """
0 commit comments