File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -21,4 +21,5 @@ use_mps: true
2121max_grad_norm : 1.0
2222target_kl : 0.02
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address :
Original file line number Diff line number Diff line change @@ -21,4 +21,5 @@ use_mps: true
2121max_grad_norm : 1.0
2222target_kl : 0.02
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address :
Original file line number Diff line number Diff line change 11env_name : HalfCheetah-v5
22num_actors : 8
33replay_buffer_capacity : 200000
4- batch_size : 64
4+ batch_size : 256
55gamma : 0.99
66gae_lambda : 0.95
7- lr : 0.0003
8- clip_ratio : 0.2
7+ lr : 0.0001
8+ clip_ratio : 0.15
99vf_coef : 0.5
10- ent_coef : 0.02
10+ ent_coef : 0.005
1111rollout_length : 2048
1212actor_update_interval : 1
13- learner_updates_per_iter : 4
13+ learner_updates_per_iter : 10
1414max_iters : 3000
1515log_interval : 10
1616metrics_path : output/halfcheetah/metrics.csv
17- hidden_sizes : [64, 64 ]
17+ hidden_sizes : [256, 256 ]
1818seed : 42
1919use_cuda : true
2020use_mps : true
21- max_grad_norm : 1.0
21+ max_grad_norm : 0.5
2222target_kl : 0.02
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address:
Original file line number Diff line number Diff line change 11env_name : HalfCheetah-v5
22num_actors : 1
33replay_buffer_capacity : 200000
4- batch_size : 64
4+ batch_size : 256
55gamma : 0.99
66gae_lambda : 0.95
7- lr : 0.0003
8- clip_ratio : 0.2
9- vf_coef : 1.0
10- ent_coef : 0.02
11- rollout_length : 4096
7+ lr : 0.0001
8+ clip_ratio : 0.15
9+ vf_coef : 0.5
10+ ent_coef : 0.005
11+ rollout_length : 8192
1212actor_update_interval : 1
13- learner_updates_per_iter : 4
13+ learner_updates_per_iter : 15
1414max_iters : 3000
1515log_interval : 10
1616metrics_path : output/halfcheetah/metrics_single.csv
17- hidden_sizes : [64, 64 ]
17+ hidden_sizes : [256, 256 ]
1818seed : 42
1919use_cuda : true
2020use_mps : true
21- max_grad_norm : 1.0
21+ max_grad_norm : 0.5
2222target_kl : 0.02
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address:
Original file line number Diff line number Diff line change 11env_name : Walker2d-v5
22num_actors : 8
33replay_buffer_capacity : 200000
4- batch_size : 64
4+ batch_size : 256
55gamma : 0.99
66gae_lambda : 0.95
7- lr : 0.0003
8- clip_ratio : 0.2
7+ lr : 0.0001
8+ clip_ratio : 0.1
99vf_coef : 0.5
10- ent_coef : 0.02
10+ ent_coef : 0.01
1111rollout_length : 2048
1212actor_update_interval : 1
13- learner_updates_per_iter : 4
13+ learner_updates_per_iter : 10
1414max_iters : 3000
1515log_interval : 10
1616metrics_path : output/walker2d/metrics.csv
17- hidden_sizes : [64, 64 ]
17+ hidden_sizes : [256, 256 ]
1818seed : 42
1919use_cuda : true
2020use_mps : true
21- max_grad_norm : 1.0
22- target_kl : 0.02
21+ max_grad_norm : 0.5
22+ target_kl : 0.015
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address:
Original file line number Diff line number Diff line change 11env_name : Walker2d-v5
22num_actors : 1
33replay_buffer_capacity : 200000
4- batch_size : 64
4+ batch_size : 256
55gamma : 0.99
66gae_lambda : 0.95
7- lr : 0.0003
8- clip_ratio : 0.2
9- vf_coef : 1.0
10- ent_coef : 0.02
11- rollout_length : 4096
7+ lr : 0.0001
8+ clip_ratio : 0.1
9+ vf_coef : 0.5
10+ ent_coef : 0.01
11+ rollout_length : 8192
1212actor_update_interval : 1
13- learner_updates_per_iter : 4
13+ learner_updates_per_iter : 15
1414max_iters : 3000
1515log_interval : 10
1616metrics_path : output/walker2d/metrics_single.csv
17- hidden_sizes : [64, 64 ]
17+ hidden_sizes : [256, 256 ]
1818seed : 42
1919use_cuda : true
2020use_mps : true
21- max_grad_norm : 1.0
22- target_kl : 0.02
21+ max_grad_norm : 0.5
22+ target_kl : 0.015
2323lr_schedule : " linear"
24+ clip_ratio_value : 0.2
2425ray_address:
Original file line number Diff line number Diff line change @@ -52,3 +52,4 @@ class Config:
5252 max_grad_norm : float = 0.5 # 梯度裁剪阈值(防止梯度爆炸)
5353 target_kl : float = 0.015 # KL 散度早停阈值(防止过度更新)
5454 lr_schedule : str = "linear" # 学习率调度:'constant' 或 'linear'
55+ clip_ratio_value : float = 0.2 # 价值函数裁剪比率(防止价值预测剧变导致 GAE 失真)
You can’t perform that action at this time.
0 commit comments