You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Licensed under the Apache License, Version 2.0 (the "License");
4
+
# you may not use this file except in compliance with the License.
5
+
# You may obtain a copy of the License at
6
+
#
7
+
# https://www.apache.org/licenses/LICENSE-2.0
8
+
#
9
+
# Unless required by applicable law or agreed to in writing, software
10
+
# distributed under the License is distributed on an "AS IS" BASIS,
11
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+
# See the License for the specific language governing permissions and
13
+
# limitations under the License.
14
+
15
+
model_config:
16
+
rng_seed: 42
17
+
model_display: false
18
+
remat_config: 3
19
+
20
+
actor_model_config:
21
+
mesh:
22
+
shape: "(8,1)"
23
+
axis_names: "('fsdp','tp')"
24
+
25
+
rollout_model_config:
26
+
mesh:
27
+
shape: "(1,8)"
28
+
axis_names: "('fsdp','tp')"
29
+
30
+
reference_model_config:
31
+
mesh: null
32
+
same_mesh_as: "actor"
33
+
34
+
data_source: "huggingface"
35
+
dataset_name: "openai/gsm8k:main"
36
+
prompt_key: "question"
37
+
38
+
training_mode: "agentic_grpo"
39
+
num_test_batches: 100
40
+
reward_functions:
41
+
- "tunix/cli/reward_fn/gsm8k.py"
42
+
verl_compatible: false
43
+
44
+
rollout_engine: "vllm"
45
+
offload_to_cpu: false
46
+
47
+
rollout_config:
48
+
max_prompt_length: 256
49
+
total_generation_steps: 768
50
+
max_tokens_to_generate: 768
51
+
temperature: 0.9
52
+
top_p: 1.0
53
+
top_k: 50
54
+
return_logprobs: true
55
+
56
+
vllm_config:
57
+
hbm_utilization: 0.4
58
+
tpu_backend_type: "jax"
59
+
server_mode: true
60
+
async_scheduling: true
61
+
kwargs:
62
+
kv_cache_metrics: true
63
+
disable_log_stats: false
64
+
enable_prefix_caching: true
65
+
66
+
chat_parser_config:
67
+
type: "qwen"
68
+
69
+
tokenizer_config:
70
+
tokenizer_type: "huggingface"
71
+
add_bos: false
72
+
add_eos: false
73
+
74
+
agentic_grpo_config:
75
+
num_iterations: 1
76
+
beta: 0.08
77
+
epsilon: 0.2
78
+
system_prompt: "You are given a grade school math problem. Think step by step and respond using <reasoning>...</reasoning> followed by <answer>...</answer> with only the final numeric answer inside <answer>."
agentic_grpo_config.system_prompt="You are given a grade school math problem. Think step by step and respond using <reasoning>...</reasoning> followed by <answer>...</answer> with only the final numeric answer inside <answer>." \
0 commit comments