AReaL/examples/agent_workflow/config_claude.yaml at main · inclusionAI/AReaL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
experiment_name: gsm8k-grpo-claude-code
trial_name: trial0

seed: 1
enable_offload: false
total_train_epochs: 10
tokenizer_path: ${actor.path}

workflow: areal.workflow.anthropic.claude_math_agent.MathToolAgent
eval_workflow: ${workflow}
max_turns: 10

cluster:
  n_nodes: 1
  n_gpus_per_node: 8
  fileroot: /tmp/areal/experiments
  name_resolve:
    type: nfs
    nfs_record_root: /tmp/areal/name_resolve


scheduler:
  type: null

rollout:
  backend: "sglang:d4p1t1"
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  tokenizer_path: ${tokenizer_path}
  max_concurrent_rollouts: 64
  queue_size: null
  consumer_batch_size: ${train_dataset.batch_size}
  max_head_offpolicyness: 2
  enable_rollout_tracing: false
  scheduling_spec: ${actor.scheduling_spec}
  dump_to_file: true
  openai:
    mode: inline
    tool_call_parser: qwen25
    reasoning_parser: qwen3
    export_style: individual
    turn_discount: 1.0

gconfig:
  n_samples: 4
  min_new_tokens: 0
  max_new_tokens: 1024
  max_tokens: 2048
  greedy: false
  temperature: 1.0

actor:
  backend: "fsdp:d4p1t1"
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  path: Qwen/Qwen2.5-1.5B-Instruct
  init_from_scratch: false
  disable_dropout: true
  gradient_checkpointing: true
  dtype: bfloat16
  mb_spec:
    max_tokens_per_mb: 10240
  optimizer:
    type: adam
    lr: 1.70e-5
    weight_decay: 0.017
    beta1: 0.9
    beta2: 0.999
    eps: 1e-8
    lr_scheduler_type: constant
    gradient_clipping: 1.0
    warmup_steps_proportion: 0.001
  eps_clip: 0.4
  temperature: ${gconfig.temperature}
  reward_scaling: 10.0
  reward_bias: -0.5
  kl_ctl: 0.0
  ppo_n_minibatches: 1
  recompute_logprob: true
  use_decoupled_loss: true
  behave_imp_weight_cap: 5.0
  reward_norm: null
  adv_norm:
    mean_level: batch
    std_level: batch
  weight_update_mode: xccl
  max_new_tokens: ${gconfig.max_new_tokens}
  scheduling_spec:
    - task_type: worker
      port_count: 2
      gpu: 1
      cmd: python3 -m areal.infra.rpc.rpc_server
      env_vars:
        NCCL_DEBUG: "WARN"
        NCCL_IB_DISABLE: "0"
        NCCL_SOCKET_IFNAME: "bond0"
        NCCL_NET: "IB"
        NCCL_NET_PLUGIN: ""
        NCCL_IB_GID_INDEX: "3"
        NCCL_IB_TIMEOUT: "22"
        NCCL_IB_RETRY_CNT: "7"
        NCCL_IB_SL: "5"
        NCCL_IB_TC: "136"
        NCCL_IB_HCA: "mlx5_bond"
        NCCL_IB_QPS_PER_CONNECTION: "8"
        NCCL_SET_THREAD_NAME: "1"
        PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"

ref:
  backend: ${actor.backend}
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  path: ${actor.path}
  init_from_scratch: false
  disable_dropout: true
  dtype: ${actor.dtype}
  mb_spec:
    max_tokens_per_mb: 10240
  optimizer: null
  scheduling_strategy:
    type: colocation
    target: actor
  scheduling_spec: ${actor.scheduling_spec}

# SGLang
sglang:
  model_path: ${actor.path}
  random_seed: ${seed}
  skip_tokenizer_init: true
  dtype: ${actor.dtype}
  max_running_requests: null
  context_length: 32768
  mem_fraction_static: 0.8

vllm:
  model: ${actor.path}
  seed: ${seed}
  skip_tokenizer_init: false
  dtype: ${actor.dtype}
  max_model_len: 32768
  gpu_memory_utilization: 0.8

# datasets
train_dataset:
  batch_size: 64
  shuffle: true
  pin_memory: true
  num_workers: 4
  path: openai/gsm8k
  type: rl
  max_length: 1024

valid_dataset:
  batch_size: 64
  pin_memory: true
  num_workers: 4
  path: openai/gsm8k
  type: rl

# Utilities
saver:
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  freq_epochs: 1
  freq_steps: null
  freq_secs: null

recover:
  mode: disabled
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  freq_epochs: 1
  freq_steps: null
  freq_secs: 3600

evaluator:
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  freq_epochs: 1
  freq_steps: null
  freq_secs: null

stats_logger:
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  wandb:
    mode: disabled

perf_tracer:
  experiment_name: ${experiment_name}
  trial_name: ${trial_name}
  fileroot: ${cluster.fileroot}
  enabled: false
  session_tracer:
    enabled: false