Skip to content

Commit 406e1c5

Browse files
JingwenGu0829yxszhaochenyang20
authored
[BREAKING, REFACTOR] V1 Pipeline Added (#334)
Co-authored-by: yxs <xuesongyey@gmail.com> Co-authored-by: zhaochenyang20 <zhaochen20@outlook.com>
1 parent 20b9c9a commit 406e1c5

218 files changed

Lines changed: 40506 additions & 82 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/test-qwen3-omni-ci-v1.yaml

Lines changed: 462 additions & 0 deletions
Large diffs are not rendered by default.

.github/workflows/test-qwen3-omni-ci.yaml

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,17 @@ permissions:
1414
contents: read
1515

1616
# DAG:
17-
# docs ──► stage-1-thinker ──► stage-2-tts
18-
# ├─► stage-3-mmmu
19-
# ├─► stage-4-mmmu-talker
20-
# ├─► stage-5-mmsu
21-
# ├─► stage-6-mmsu-talker
22-
# ├─► stage-7-videomme
23-
# ├─► stage-8-videomme-talker
24-
# ├─► stage-9-videoamme
25-
# └─► stage-10-videoamme-talker
17+
# docs ──► stage-1-thinker
18+
# ├─► stage-2-tts
19+
# ├─► stage-3-mmmu
20+
# ├─► stage-4-mmmu-talker
21+
# ├─► stage-5-mmsu
22+
# ├─► stage-6-mmsu-talker
23+
# ├─► stage-7-videomme
24+
# ├─► stage-8-videomme-talker
25+
# ├─► stage-9-videoamme
26+
# └─► stage-10-videoamme-talker
27+
# All 10 stages run in parallel after docs passes — they are independent.
2628

2729
jobs:
2830
docs:
@@ -106,7 +108,7 @@ jobs:
106108

107109
stage-2-tts:
108110
name: stage 2 - TTS speed + WER
109-
needs: [docs, stage-1-thinker]
111+
needs: docs
110112
runs-on: [self-hosted]
111113
timeout-minutes: 15
112114
container:
@@ -143,7 +145,7 @@ jobs:
143145

144146
stage-3-mmmu:
145147
name: stage 3 - MMMU accuracy + speed
146-
needs: [docs, stage-1-thinker]
148+
needs: docs
147149
runs-on: [self-hosted]
148150
timeout-minutes: 20
149151
container:
@@ -178,7 +180,7 @@ jobs:
178180

179181
stage-4-mmmu-talker:
180182
name: stage 4 - MMMU Talker
181-
needs: [docs, stage-1-thinker]
183+
needs: docs
182184
runs-on: [self-hosted]
183185
timeout-minutes: 15
184186
container:
@@ -213,7 +215,7 @@ jobs:
213215

214216
stage-5-mmsu:
215217
name: stage 5 - MMSU accuracy + speed
216-
needs: [docs, stage-1-thinker]
218+
needs: docs
217219
runs-on: [self-hosted]
218220
timeout-minutes: 20
219221
container:
@@ -249,7 +251,7 @@ jobs:
249251

250252
stage-6-mmsu-talker:
251253
name: stage 6 - MMSU Talker
252-
needs: [docs, stage-1-thinker]
254+
needs: docs
253255
runs-on: [self-hosted]
254256
timeout-minutes: 15
255257
container:
@@ -284,7 +286,7 @@ jobs:
284286

285287
stage-7-videomme:
286288
name: stage 7 - Video-MME accuracy + speed
287-
needs: [docs, stage-1-thinker]
289+
needs: docs
288290
runs-on: [self-hosted]
289291
timeout-minutes: 30
290292
container:
@@ -319,7 +321,7 @@ jobs:
319321

320322
stage-8-videomme-talker:
321323
name: stage 8 - Video-MME Talker
322-
needs: [docs, stage-1-thinker]
324+
needs: docs
323325
runs-on: [self-hosted]
324326
timeout-minutes: 30
325327
container:
@@ -354,7 +356,7 @@ jobs:
354356

355357
stage-9-videoamme:
356358
name: stage 9 - Video-AMME accuracy + speed
357-
needs: [docs, stage-1-thinker]
359+
needs: docs
358360
runs-on: [self-hosted]
359361
timeout-minutes: 30
360362
container:
@@ -389,7 +391,7 @@ jobs:
389391

390392
stage-10-videoamme-talker:
391393
name: stage 10 - Video-AMME Talker
392-
needs: [docs, stage-1-thinker]
394+
needs: docs
393395
runs-on: [self-hosted]
394396
timeout-minutes: 30
395397
container:
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
name: S2-Pro CI (v1)
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
types: [labeled, synchronize, reopened, ready_for_review]
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: s2pro-ci-v1-${{ github.event.pull_request.number || github.ref }}
11+
cancel-in-progress: true
12+
13+
permissions:
14+
contents: read
15+
16+
# DAG:
17+
# docs ──► stage-1-non-streaming ──┐
18+
# └─► stage-2-streaming ──────┤
19+
# └──► stage-3-consistency
20+
21+
jobs:
22+
docs:
23+
name: docs
24+
# CI gate — runs on workflow_dispatch, or on non-draft PRs carrying the `run-ci` label.
25+
# Keep in sync with the same gate in test.yaml / test-examples.yaml / test-qwen3-omni-ci.yaml.
26+
if: >-
27+
github.event_name == 'workflow_dispatch' ||
28+
(!github.event.pull_request.draft &&
29+
contains(github.event.pull_request.labels.*.name, 'run-ci'))
30+
runs-on: [self-hosted]
31+
timeout-minutes: 10
32+
container:
33+
image: frankleeeee/sglang-omni:dev
34+
options: --gpus all --rm -v /dev/shm:/dev/shm
35+
steps:
36+
- name: Checkout code
37+
uses: actions/checkout@v4
38+
39+
- uses: ./.github/actions/omni-setup
40+
with:
41+
venv-name: omni-s2pro
42+
install-deps: "true"
43+
44+
- name: Run docs tests
45+
shell: bash
46+
run: |
47+
source omni-s2pro/bin/activate
48+
export PYTHONPATH=$PWD
49+
pytest tests/docs/s2pro/test_docs_tts_s2pro.py -v -s -x
50+
env:
51+
HF_ENDPOINT: https://hf-mirror.com
52+
SGLANG_OMNI_SERVER_VERSION: v1
53+
54+
- name: Post-stage cleanup
55+
if: always()
56+
uses: ./.github/actions/omni-post-stage
57+
with:
58+
stage-label: S2-Pro docs
59+
60+
- name: Save cache
61+
if: always()
62+
shell: bash
63+
run: |
64+
if omni-s2pro/bin/python -c "import torch" 2>/dev/null; then
65+
rm -rf /github/home/omni-s2pro
66+
cp -p -r omni-s2pro /github/home/
67+
else
68+
echo "::warning::Skipping cache save — venv appears corrupted (torch not importable)"
69+
fi
70+
71+
stage-1-non-streaming:
72+
name: stage 1 - non-streaming
73+
needs: docs
74+
runs-on: [self-hosted]
75+
timeout-minutes: 5
76+
container:
77+
image: frankleeeee/sglang-omni:dev
78+
options: --gpus all --rm -v /dev/shm:/dev/shm
79+
steps:
80+
- name: Checkout code
81+
uses: actions/checkout@v4
82+
83+
- uses: ./.github/actions/omni-setup
84+
with:
85+
venv-name: omni-s2pro
86+
87+
- name: Run benchmark stage
88+
shell: bash
89+
run: |
90+
source omni-s2pro/bin/activate
91+
export PYTHONPATH=$PWD
92+
pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-1-nonstream
93+
env:
94+
HF_ENDPOINT: https://hf-mirror.com
95+
SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
96+
S2PRO_STAGE_OUTPUT_ROOT: ${{ github.workspace }}/stage-results/nonstream
97+
SGLANG_OMNI_SERVER_VERSION: v1
98+
99+
- name: Post-stage cleanup
100+
if: always()
101+
uses: ./.github/actions/omni-post-stage
102+
with:
103+
stage-label: S2-Pro stage 1 (non-streaming)
104+
artifact-search-root: stage-results/nonstream
105+
artifact-path-globs: |
106+
*/speed_results.json
107+
*/wer_results.json
108+
artifact-upload-name: s2pro-stage-1-results
109+
artifact-upload-path: stage-results/nonstream
110+
artifact-if-no-files-found: error
111+
112+
stage-2-streaming:
113+
name: stage 2 - streaming
114+
needs: docs
115+
runs-on: [self-hosted]
116+
timeout-minutes: 5
117+
container:
118+
image: frankleeeee/sglang-omni:dev
119+
options: --gpus all --rm -v /dev/shm:/dev/shm
120+
steps:
121+
- name: Checkout code
122+
uses: actions/checkout@v4
123+
124+
- uses: ./.github/actions/omni-setup
125+
with:
126+
venv-name: omni-s2pro
127+
128+
- name: Run benchmark stage
129+
shell: bash
130+
run: |
131+
source omni-s2pro/bin/activate
132+
export PYTHONPATH=$PWD
133+
pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-2-stream
134+
env:
135+
HF_ENDPOINT: https://hf-mirror.com
136+
SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
137+
S2PRO_STAGE_OUTPUT_ROOT: ${{ github.workspace }}/stage-results/stream
138+
SGLANG_OMNI_SERVER_VERSION: v1
139+
140+
- name: Post-stage cleanup
141+
if: always()
142+
uses: ./.github/actions/omni-post-stage
143+
with:
144+
stage-label: S2-Pro stage 2 (streaming)
145+
artifact-search-root: stage-results/stream
146+
artifact-path-globs: |
147+
*/speed_results.json
148+
*/wer_results.json
149+
artifact-upload-name: s2pro-stage-2-results
150+
artifact-upload-path: stage-results/stream
151+
artifact-if-no-files-found: error
152+
153+
stage-3-consistency:
154+
name: stage 3 - consistency
155+
needs: [stage-1-non-streaming, stage-2-streaming]
156+
runs-on: [self-hosted]
157+
timeout-minutes: 5
158+
container:
159+
image: frankleeeee/sglang-omni:dev
160+
options: --gpus all --rm -v /dev/shm:/dev/shm
161+
steps:
162+
- name: Checkout code
163+
uses: actions/checkout@v4
164+
165+
- uses: ./.github/actions/omni-setup
166+
with:
167+
venv-name: omni-s2pro
168+
169+
- name: Download stage 1 artifact
170+
uses: actions/download-artifact@v4
171+
with:
172+
name: s2pro-stage-1-results
173+
path: stage-artifacts/nonstream
174+
175+
- name: Download stage 2 artifact
176+
uses: actions/download-artifact@v4
177+
with:
178+
name: s2pro-stage-2-results
179+
path: stage-artifacts/stream
180+
181+
- name: Run benchmark stage
182+
shell: bash
183+
run: |
184+
source omni-s2pro/bin/activate
185+
export PYTHONPATH=$PWD
186+
pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-3-consistency
187+
env:
188+
HF_ENDPOINT: https://hf-mirror.com
189+
SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
190+
S2PRO_STAGE1_SPEED_RESULTS_DIR: ${{ github.workspace }}/stage-artifacts/nonstream
191+
S2PRO_STAGE2_SPEED_RESULTS_DIR: ${{ github.workspace }}/stage-artifacts/stream
192+
SGLANG_OMNI_SERVER_VERSION: v1
193+
194+
- name: Post-stage cleanup
195+
if: always()
196+
uses: ./.github/actions/omni-post-stage
197+
with:
198+
stage-label: S2-Pro stage 3 (consistency inputs)
199+
artifact-search-root: stage-artifacts
200+
artifact-path-globs: |
201+
*/speed_results.json
202+
*/wer_results.json

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ htmlcov/
2828

2929
# Logs
3030
*.log
31+
*.txt
3132
output.wav
3233

3334
# OS

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
default_stages: [pre-commit, pre-push, manual]
22

3+
exclude: \.txt$
4+
35
repos:
46
- repo: https://github.com/PyCQA/autoflake
57
rev: v2.3.1

benchmarks/eval/benchmark_omni_mmmu.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,20 @@
5151
| Qwen3-Omni | enable_audio=True | 123.13 | 221.52 | 0.004 | 2.2 | 2.1 | PR #316 [H200, **50-sample subset**, c=1, max_tokens=2048] |
5252
| Qwen3-Omni | enable_audio=False | 20.297 | 74.122 | 0.392 | 24.9 | 25.4 | PR #351 [H100, full-set, c=8, max_tokens=2048, text-only server] |
5353
| Qwen3-Omni | enable_audio=True | 19.579 | 23.147 | 0.009 | 3.3 | 3.3 | PR #351 [H100, 50-sample subset, c=1, max_tokens=64, timeout=120s] |
54+
55+
Local v1 Pipeline Result (this workspace, 2026-05-01)
56+
57+
Accuracy (summary)
58+
59+
| Model | Config | accuracy | correct | failed | mc_fallback | Source |
60+
| ---------- | ------------------ | -------- | ------- | ------ | ----------- | ------------------------------------------------------------ |
61+
| Qwen3-Omni | enable_audio=False | 67.11% | 604/900 | 0 | 26 | local v1 sweep [H200, full-set, c=8, max_tokens=2048] |
62+
63+
Speed (speed)
64+
65+
| Model | Config | latency_mean_s | latency_p95_s | throughput_qps | tok_per_s_mean | tok_per_s_agg | Source |
66+
| ---------- | ------------------ | -------------- | ------------- | -------------- | -------------- | ------------- | ------------------------------------------------------------ |
67+
| Qwen3-Omni | enable_audio=False | 6.542 | 21.356 | 1.202 | 76.3 | 76.5 | local v1 sweep [H200, full-set, c=8, max_tokens=2048] |
5468
"""
5569

5670

@@ -180,13 +194,6 @@ async def run_mmmu_eval(config: MMMUEvalConfig) -> dict:
180194
request_results, config.lang, config.asr_device
181195
)
182196

183-
print_mmmu_accuracy_summary(summary, config.model)
184-
print_speed_summary(
185-
speed_metrics, config.model, config.max_concurrency, title="MMMU Speed"
186-
)
187-
if "wer" in results:
188-
print_wer_summary(results["wer"]["summary"], config.model)
189-
190197
if config.output_dir:
191198
save_json_results(results, config.output_dir, "mmmu_results.json")
192199

@@ -216,7 +223,17 @@ def _config_from_args(args: argparse.Namespace) -> MMMUEvalConfig:
216223

217224
async def benchmark(args: argparse.Namespace) -> dict:
218225
config = _config_from_args(args)
219-
return await run_mmmu_eval(config)
226+
results = await run_mmmu_eval(config)
227+
print_mmmu_accuracy_summary(results["summary"], config.model)
228+
print_speed_summary(
229+
results["speed"],
230+
config.model,
231+
config.max_concurrency,
232+
title="MMMU Speed",
233+
)
234+
if "wer" in results:
235+
print_wer_summary(results["wer"]["summary"], config.model)
236+
return results
220237

221238

222239
def main() -> None:

0 commit comments

Comments
 (0)