-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathconfig_benchmark_datatrove.yaml
More file actions
87 lines (81 loc) · 2.72 KB
/
config_benchmark_datatrove.yaml
File metadata and controls
87 lines (81 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# MMIRAGE — DataTrove-compatible throughput benchmark
#
# Mirrors the conditions used in the DataTrove inference benchmark
# (https://github.com/huggingface/datatrove/tree/main/examples/inference/benchmark):
#
# dataset : simplescaling/s1K-1.1 (train split, 1 000 samples)
# prompt : raw `question` field, no system prompt
# output : up to 1 024 tokens per sample
# context : 2 048-token model max context
# model : Qwen/Qwen3-4B (DataTrove baseline: tp=1 on a single GPU)
#
# Download the dataset before running:
#
# python -c "
# from datasets import load_dataset
# ds = load_dataset('simplescaling/s1K-1.1', split='train')
# ds.save_to_disk('data/s1K-1.1')
# "
#
# Then run with stats collection enabled:
#
# mmirage run --config configs/config_benchmark_datatrove.yaml --stats
#
# Inspect results:
#
# mmirage stats --config configs/config_benchmark_datatrove.yaml
processors:
- type: llm
server_args:
model_path: Qwen/Qwen3-4B # same model family as DataTrove baseline
tp_size: 1 # DataTrove baseline: tp=1
trust_remote_code: true
disable_custom_all_reduce: true
# SGLang engine tuning — equivalents of DataTrove's vLLM mns/mnbt knobs
extra_engine_args:
max_running_requests: 1000
default_sampling_params:
temperature: 0.0
max_new_tokens: 1024 # DataTrove: max-tokens=1024
loading_params:
state_dir: data/benchmark_s1k/_pipeline_state
datasets:
- path: data/s1K-1.1 # save_to_disk() target above
type: loadable
output_dir: data/benchmark_s1k/output
num_shards: 1
shard_id: "$SLURM_ARRAY_TASK_ID"
batch_size: 1000
processing_params:
inputs:
- name: question
key: question # DataTrove: prompt-column=question
outputs:
- name: answer
type: llm
output_type: plain
# Qwen3 thinking is disabled by embedding an empty <think> block in the prompt.
# This is equivalent to passing enable_thinking=False to the chat template and
# avoids any dependency on SGLang sampling-param support for that flag.
prompt: "<|im_start|>user\n{{ question }}\n<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n"
remove_columns: false
output_schema:
question: "{{ question }}"
answer: "{{ answer }}"
execution_params:
mode: slurm
retry: false
merge: false
max_retries: 3
account: a127
job_name: mmirage-sharded
nodes: 1
ntasks_per_node: 1
gpus: 4
cpus_per_task: 288
time_limit: "11:59:59"
report_dir: "/users/${USER}/reports"
hf_home: "/capstor/store/cscs/swissai/a127/homes/${USER}/hf"
edf_env: "/users/${USER}/.edf/mmirage.toml"
poll_interval_seconds: 30
settle_time_seconds: 60