forked from agentscope-ai/agentscope-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig_difficulty.yaml
More file actions
74 lines (64 loc) · 2.75 KB
/
config_difficulty.yaml
File metadata and controls
74 lines (64 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
project: "Data-Augmentation" # Project name
name: "Difficulty-Based-Selector" # Experiment name
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints
data_processor:
experience_pipeline:
operators:
- name: pass_rate_calculator # Calculate average reward and pass it back to selector
buffer:
total_epochs: 1 # Total training epochs
explorer_input:
taskset:
path: "path/to/your/augmented/math_data" # Training data path
split: "train" # Training data split
task_selector:
selector_type: difficulty_based # Strategy of task selection
feature_keys: [ "qwen2.5_7b_pass_rate", "qwen3_30b_pass_rate" ] # Utilized pass_rate key
kwargs: # Hyperparameter from [BOTS](https://github.com/modelscope/Trinity-RFT/blob/main/examples/bots/README.md)
m: 8
lamb: 0.1
rho: 0.1
target_reward: 0.8
tau: 0
do_sample: true
eval_tasksets:
- name: "eval-aime24" # Evaluation data name
path: "path/to/aime24_data" # Evaluation data path
split: "test" # Evaluation data split
synchronizer:
sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer
sync_method: 'nccl'
sync_interval: 4 # Sync every N steps
sync_timeout: 7200 # Timeout for synchronization (seconds)
monitor:
monitor_type: tensorboard # Can also use wandb, mlflow or swanlab
# The config below has been set in python file
algorithm:
algorithm_type: multi_step_grpo # GRPO series for multi-step scenario
repeat_times: 8 # Number of rollouts per prompt for advantage estimation
optimizer:
lr: 1e-6 # Learning rate
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-0.6B} # Base model path
max_model_len: 24576 # Max context length
max_response_tokens: 16384 # Max tokens per response
temperature: 1.0 # Temperature of model's generation
cluster:
node_num: 1 # Number of used nodes
gpu_per_node: 8 # Number of GPUs every node
explorer:
eval_interval: 20 # Evaluation every N steps
runner_per_model: 16 # Runners per infer engine
max_timeout: 1200 # Max timeout for each rollout (seconds)
rollout_model:
engine_num: 4 # Number of vLLM engines for rollout model
tensor_parallel_size: 1 # TP size per engine for rollout model
enable_openai_api: true # Enable OpenAI-compatible API
enable_history: true # Enable conversation history
enable_auto_tool_choice: true # Enable automatic tool selection
tool_call_parser: hermes # Parser for tool calls
reasoning_parser: deepseek_r1 # Parser for reasoning type
trainer:
save_interval: 100 # Save checkpoint every N steps
use_dynamic_bsz: true # Use dynamic batch size
ulysses_sequence_parallel_size: 1 # Sequence parallel size for Ulysses