Skip to content

Commit 98b6872

Browse files
authored
Merge pull request #36 from wellkilo/feat/nextjs-tailwind-migration
feat: capture and display training error details on crash
2 parents fd3e859 + 9862bea commit 98b6872

16 files changed

Lines changed: 473 additions & 114 deletions

config/config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ use_mps: true
2121
max_grad_norm: 1.0
2222
target_kl: 0.02
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

config/config_single.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ use_mps: true
2121
max_grad_norm: 1.0
2222
target_kl: 0.02
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

config/halfcheetah.yaml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
env_name: HalfCheetah-v5
22
num_actors: 8
33
replay_buffer_capacity: 200000
4-
batch_size: 64
4+
batch_size: 256
55
gamma: 0.99
66
gae_lambda: 0.95
7-
lr: 0.0003
8-
clip_ratio: 0.2
7+
lr: 0.0001
8+
clip_ratio: 0.15
99
vf_coef: 0.5
10-
ent_coef: 0.02
10+
ent_coef: 0.005
1111
rollout_length: 2048
1212
actor_update_interval: 1
13-
learner_updates_per_iter: 4
13+
learner_updates_per_iter: 10
1414
max_iters: 3000
1515
log_interval: 10
1616
metrics_path: output/halfcheetah/metrics.csv
17-
hidden_sizes: [64, 64]
17+
hidden_sizes: [256, 256]
1818
seed: 42
1919
use_cuda: true
2020
use_mps: true
21-
max_grad_norm: 1.0
21+
max_grad_norm: 0.5
2222
target_kl: 0.02
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

config/halfcheetah_single.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
env_name: HalfCheetah-v5
22
num_actors: 1
33
replay_buffer_capacity: 200000
4-
batch_size: 64
4+
batch_size: 256
55
gamma: 0.99
66
gae_lambda: 0.95
7-
lr: 0.0003
8-
clip_ratio: 0.2
9-
vf_coef: 1.0
10-
ent_coef: 0.02
11-
rollout_length: 4096
7+
lr: 0.0001
8+
clip_ratio: 0.15
9+
vf_coef: 0.5
10+
ent_coef: 0.005
11+
rollout_length: 8192
1212
actor_update_interval: 1
13-
learner_updates_per_iter: 4
13+
learner_updates_per_iter: 15
1414
max_iters: 3000
1515
log_interval: 10
1616
metrics_path: output/halfcheetah/metrics_single.csv
17-
hidden_sizes: [64, 64]
17+
hidden_sizes: [256, 256]
1818
seed: 42
1919
use_cuda: true
2020
use_mps: true
21-
max_grad_norm: 1.0
21+
max_grad_norm: 0.5
2222
target_kl: 0.02
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

config/walker2d.yaml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
env_name: Walker2d-v5
22
num_actors: 8
33
replay_buffer_capacity: 200000
4-
batch_size: 64
4+
batch_size: 256
55
gamma: 0.99
66
gae_lambda: 0.95
7-
lr: 0.0003
8-
clip_ratio: 0.2
7+
lr: 0.0001
8+
clip_ratio: 0.1
99
vf_coef: 0.5
10-
ent_coef: 0.02
10+
ent_coef: 0.01
1111
rollout_length: 2048
1212
actor_update_interval: 1
13-
learner_updates_per_iter: 4
13+
learner_updates_per_iter: 10
1414
max_iters: 3000
1515
log_interval: 10
1616
metrics_path: output/walker2d/metrics.csv
17-
hidden_sizes: [64, 64]
17+
hidden_sizes: [256, 256]
1818
seed: 42
1919
use_cuda: true
2020
use_mps: true
21-
max_grad_norm: 1.0
22-
target_kl: 0.02
21+
max_grad_norm: 0.5
22+
target_kl: 0.015
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

config/walker2d_single.yaml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
env_name: Walker2d-v5
22
num_actors: 1
33
replay_buffer_capacity: 200000
4-
batch_size: 64
4+
batch_size: 256
55
gamma: 0.99
66
gae_lambda: 0.95
7-
lr: 0.0003
8-
clip_ratio: 0.2
9-
vf_coef: 1.0
10-
ent_coef: 0.02
11-
rollout_length: 4096
7+
lr: 0.0001
8+
clip_ratio: 0.1
9+
vf_coef: 0.5
10+
ent_coef: 0.01
11+
rollout_length: 8192
1212
actor_update_interval: 1
13-
learner_updates_per_iter: 4
13+
learner_updates_per_iter: 15
1414
max_iters: 3000
1515
log_interval: 10
1616
metrics_path: output/walker2d/metrics_single.csv
17-
hidden_sizes: [64, 64]
17+
hidden_sizes: [256, 256]
1818
seed: 42
1919
use_cuda: true
2020
use_mps: true
21-
max_grad_norm: 1.0
22-
target_kl: 0.02
21+
max_grad_norm: 0.5
22+
target_kl: 0.015
2323
lr_schedule: "linear"
24+
clip_ratio_value: 0.2
2425
ray_address:

drl/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ class Config:
5252
max_grad_norm: float = 0.5 # 梯度裁剪阈值(防止梯度爆炸)
5353
target_kl: float = 0.015 # KL 散度早停阈值(防止过度更新)
5454
lr_schedule: str = "linear" # 学习率调度:'constant' 或 'linear'
55+
clip_ratio_value: float = 0.2 # 价值函数裁剪比率(防止价值预测剧变导致 GAE 失真)

0 commit comments

Comments
 (0)