-
Notifications
You must be signed in to change notification settings - Fork 423
Expand file tree
/
Copy pathconfig_claude.yaml
More file actions
200 lines (182 loc) · 4.21 KB
/
config_claude.yaml
File metadata and controls
200 lines (182 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
experiment_name: gsm8k-grpo-claude-code
trial_name: trial0
seed: 1
enable_offload: false
total_train_epochs: 10
tokenizer_path: ${actor.path}
workflow: areal.workflow.anthropic.claude_math_agent.MathToolAgent
eval_workflow: ${workflow}
max_turns: 10
cluster:
n_nodes: 1
n_gpus_per_node: 8
fileroot: /tmp/areal/experiments
name_resolve:
type: nfs
nfs_record_root: /tmp/areal/name_resolve
scheduler:
type: null
rollout:
backend: "sglang:d4p1t1"
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
tokenizer_path: ${tokenizer_path}
max_concurrent_rollouts: 64
queue_size: null
consumer_batch_size: ${train_dataset.batch_size}
max_head_offpolicyness: 2
enable_rollout_tracing: false
scheduling_spec: ${actor.scheduling_spec}
dump_to_file: true
openai:
mode: inline
tool_call_parser: qwen25
reasoning_parser: qwen3
export_style: individual
turn_discount: 1.0
gconfig:
n_samples: 4
min_new_tokens: 0
max_new_tokens: 1024
max_tokens: 2048
greedy: false
temperature: 1.0
actor:
backend: "fsdp:d4p1t1"
experiment_name: ${experiment_name}
trial_name: ${trial_name}
path: Qwen/Qwen2.5-1.5B-Instruct
init_from_scratch: false
disable_dropout: true
gradient_checkpointing: true
dtype: bfloat16
mb_spec:
max_tokens_per_mb: 10240
optimizer:
type: adam
lr: 1.70e-5
weight_decay: 0.017
beta1: 0.9
beta2: 0.999
eps: 1e-8
lr_scheduler_type: constant
gradient_clipping: 1.0
warmup_steps_proportion: 0.001
eps_clip: 0.4
temperature: ${gconfig.temperature}
reward_scaling: 10.0
reward_bias: -0.5
kl_ctl: 0.0
ppo_n_minibatches: 1
recompute_logprob: true
use_decoupled_loss: true
behave_imp_weight_cap: 5.0
reward_norm: null
adv_norm:
mean_level: batch
std_level: batch
weight_update_mode: xccl
max_new_tokens: ${gconfig.max_new_tokens}
scheduling_spec:
- task_type: worker
port_count: 2
gpu: 1
cmd: python3 -m areal.infra.rpc.rpc_server
env_vars:
NCCL_DEBUG: "WARN"
NCCL_IB_DISABLE: "0"
NCCL_SOCKET_IFNAME: "bond0"
NCCL_NET: "IB"
NCCL_NET_PLUGIN: ""
NCCL_IB_GID_INDEX: "3"
NCCL_IB_TIMEOUT: "22"
NCCL_IB_RETRY_CNT: "7"
NCCL_IB_SL: "5"
NCCL_IB_TC: "136"
NCCL_IB_HCA: "mlx5_bond"
NCCL_IB_QPS_PER_CONNECTION: "8"
NCCL_SET_THREAD_NAME: "1"
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
ref:
backend: ${actor.backend}
experiment_name: ${experiment_name}
trial_name: ${trial_name}
path: ${actor.path}
init_from_scratch: false
disable_dropout: true
dtype: ${actor.dtype}
mb_spec:
max_tokens_per_mb: 10240
optimizer: null
scheduling_strategy:
type: colocation
target: actor
scheduling_spec: ${actor.scheduling_spec}
# SGLang
sglang:
model_path: ${actor.path}
random_seed: ${seed}
skip_tokenizer_init: true
dtype: ${actor.dtype}
max_running_requests: null
context_length: 32768
mem_fraction_static: 0.8
vllm:
model: ${actor.path}
seed: ${seed}
skip_tokenizer_init: false
dtype: ${actor.dtype}
max_model_len: 32768
gpu_memory_utilization: 0.8
# datasets
train_dataset:
batch_size: 64
shuffle: true
pin_memory: true
num_workers: 4
path: openai/gsm8k
type: rl
max_length: 1024
valid_dataset:
batch_size: 64
pin_memory: true
num_workers: 4
path: openai/gsm8k
type: rl
# Utilities
saver:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: 1
freq_steps: null
freq_secs: null
recover:
mode: disabled
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: 1
freq_steps: null
freq_secs: 3600
evaluator:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: 1
freq_steps: null
freq_secs: null
stats_logger:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
wandb:
mode: disabled
perf_tracer:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
enabled: false
session_tracer:
enabled: false