sgl-project
diff --git a/‎.github/workflows/test-qwen3-omni-ci-v1.yaml‎
Lines changed: 462 additions & 0 deletions b/‎.github/workflows/test-qwen3-omni-ci-v1.yaml‎
Lines changed: 462 additions & 0 deletions
diff --git a/‎.github/workflows/test-qwen3-omni-ci.yaml‎
Lines changed: 20 additions & 18 deletions b/‎.github/workflows/test-qwen3-omni-ci.yaml‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎.github/workflows/test-s2pro-ci-v1.yaml‎
Lines changed: 202 additions & 0 deletions b/‎.github/workflows/test-s2pro-ci-v1.yaml‎
Lines changed: 202 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎benchmarks/eval/benchmark_omni_mmmu.py‎
Lines changed: 25 additions & 8 deletions b/‎benchmarks/eval/benchmark_omni_mmmu.py‎
Lines changed: 25 additions & 8 deletions
@@ -14,15 +14,17 @@ permissions:
   contents: read
 
 # DAG:
-#   docs ──► stage-1-thinker ──► stage-2-tts
-#                              ├─► stage-3-mmmu
-#                              ├─► stage-4-mmmu-talker
-#                              ├─► stage-5-mmsu
-#                              ├─► stage-6-mmsu-talker
-#                              ├─► stage-7-videomme
-#                              ├─► stage-8-videomme-talker
-#                              ├─► stage-9-videoamme
-#                              └─► stage-10-videoamme-talker
+#   docs ──► stage-1-thinker
+#        ├─► stage-2-tts
+#        ├─► stage-3-mmmu
+#        ├─► stage-4-mmmu-talker
+#        ├─► stage-5-mmsu
+#        ├─► stage-6-mmsu-talker
+#        ├─► stage-7-videomme
+#        ├─► stage-8-videomme-talker
+#        ├─► stage-9-videoamme
+#        └─► stage-10-videoamme-talker
+# All 10 stages run in parallel after docs passes — they are independent.
 
 jobs:
   docs:
@@ -106,7 +108,7 @@ jobs:
 
   stage-2-tts:
     name: stage 2 - TTS speed + WER
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 15
     container:
@@ -143,7 +145,7 @@ jobs:
 
   stage-3-mmmu:
     name: stage 3 - MMMU accuracy + speed
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 20
     container:
@@ -178,7 +180,7 @@ jobs:
 
   stage-4-mmmu-talker:
     name: stage 4 - MMMU Talker
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 15
     container:
@@ -213,7 +215,7 @@ jobs:
 
   stage-5-mmsu:
     name: stage 5 - MMSU accuracy + speed
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 20
     container:
@@ -249,7 +251,7 @@ jobs:
 
   stage-6-mmsu-talker:
     name: stage 6 - MMSU Talker
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 15
     container:
@@ -284,7 +286,7 @@ jobs:
 
   stage-7-videomme:
     name: stage 7 - Video-MME accuracy + speed
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 30
     container:
@@ -319,7 +321,7 @@ jobs:
 
   stage-8-videomme-talker:
     name: stage 8 - Video-MME Talker
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 30
     container:
@@ -354,7 +356,7 @@ jobs:
 
   stage-9-videoamme:
     name: stage 9 - Video-AMME accuracy + speed
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 30
     container:
@@ -389,7 +391,7 @@ jobs:
 
   stage-10-videoamme-talker:
     name: stage 10 - Video-AMME Talker
-    needs: [docs, stage-1-thinker]
+    needs: docs
     runs-on: [self-hosted]
     timeout-minutes: 30
     container:
 
@@ -0,0 +1,202 @@
+name: S2-Pro CI (v1)
+
+on:
+  pull_request:
+    branches: [main]
+    types: [labeled, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+
+concurrency:
+  group: s2pro-ci-v1-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+# DAG:
+#   docs ──► stage-1-non-streaming ──┐
+#        └─► stage-2-streaming ──────┤
+#                                    └──► stage-3-consistency
+
+jobs:
+  docs:
+    name: docs
+    # CI gate — runs on workflow_dispatch, or on non-draft PRs carrying the `run-ci` label.
+    # Keep in sync with the same gate in test.yaml / test-examples.yaml / test-qwen3-omni-ci.yaml.
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      (!github.event.pull_request.draft &&
+       contains(github.event.pull_request.labels.*.name, 'run-ci'))
+    runs-on: [self-hosted]
+    timeout-minutes: 10
+    container:
+      image: frankleeeee/sglang-omni:dev
+      options: --gpus all --rm -v /dev/shm:/dev/shm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: ./.github/actions/omni-setup
+        with:
+          venv-name: omni-s2pro
+          install-deps: "true"
+
+      - name: Run docs tests
+        shell: bash
+        run: |
+          source omni-s2pro/bin/activate
+          export PYTHONPATH=$PWD
+          pytest tests/docs/s2pro/test_docs_tts_s2pro.py -v -s -x
+        env:
+          HF_ENDPOINT: https://hf-mirror.com
+          SGLANG_OMNI_SERVER_VERSION: v1
+
+      - name: Post-stage cleanup
+        if: always()
+        uses: ./.github/actions/omni-post-stage
+        with:
+          stage-label: S2-Pro docs
+
+      - name: Save cache
+        if: always()
+        shell: bash
+        run: |
+          if omni-s2pro/bin/python -c "import torch" 2>/dev/null; then
+            rm -rf /github/home/omni-s2pro
+            cp -p -r omni-s2pro /github/home/
+          else
+            echo "::warning::Skipping cache save — venv appears corrupted (torch not importable)"
+          fi
+
+  stage-1-non-streaming:
+    name: stage 1 - non-streaming
+    needs: docs
+    runs-on: [self-hosted]
+    timeout-minutes: 5
+    container:
+      image: frankleeeee/sglang-omni:dev
+      options: --gpus all --rm -v /dev/shm:/dev/shm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: ./.github/actions/omni-setup
+        with:
+          venv-name: omni-s2pro
+
+      - name: Run benchmark stage
+        shell: bash
+        run: |
+          source omni-s2pro/bin/activate
+          export PYTHONPATH=$PWD
+          pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-1-nonstream
+        env:
+          HF_ENDPOINT: https://hf-mirror.com
+          SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
+          S2PRO_STAGE_OUTPUT_ROOT: ${{ github.workspace }}/stage-results/nonstream
+          SGLANG_OMNI_SERVER_VERSION: v1
+
+      - name: Post-stage cleanup
+        if: always()
+        uses: ./.github/actions/omni-post-stage
+        with:
+          stage-label: S2-Pro stage 1 (non-streaming)
+          artifact-search-root: stage-results/nonstream
+          artifact-path-globs: |
+            */speed_results.json
+            */wer_results.json
+          artifact-upload-name: s2pro-stage-1-results
+          artifact-upload-path: stage-results/nonstream
+          artifact-if-no-files-found: error
+
+  stage-2-streaming:
+    name: stage 2 - streaming
+    needs: docs
+    runs-on: [self-hosted]
+    timeout-minutes: 5
+    container:
+      image: frankleeeee/sglang-omni:dev
+      options: --gpus all --rm -v /dev/shm:/dev/shm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: ./.github/actions/omni-setup
+        with:
+          venv-name: omni-s2pro
+
+      - name: Run benchmark stage
+        shell: bash
+        run: |
+          source omni-s2pro/bin/activate
+          export PYTHONPATH=$PWD
+          pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-2-stream
+        env:
+          HF_ENDPOINT: https://hf-mirror.com
+          SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
+          S2PRO_STAGE_OUTPUT_ROOT: ${{ github.workspace }}/stage-results/stream
+          SGLANG_OMNI_SERVER_VERSION: v1
+
+      - name: Post-stage cleanup
+        if: always()
+        uses: ./.github/actions/omni-post-stage
+        with:
+          stage-label: S2-Pro stage 2 (streaming)
+          artifact-search-root: stage-results/stream
+          artifact-path-globs: |
+            */speed_results.json
+            */wer_results.json
+          artifact-upload-name: s2pro-stage-2-results
+          artifact-upload-path: stage-results/stream
+          artifact-if-no-files-found: error
+
+  stage-3-consistency:
+    name: stage 3 - consistency
+    needs: [stage-1-non-streaming, stage-2-streaming]
+    runs-on: [self-hosted]
+    timeout-minutes: 5
+    container:
+      image: frankleeeee/sglang-omni:dev
+      options: --gpus all --rm -v /dev/shm:/dev/shm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: ./.github/actions/omni-setup
+        with:
+          venv-name: omni-s2pro
+
+      - name: Download stage 1 artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: s2pro-stage-1-results
+          path: stage-artifacts/nonstream
+
+      - name: Download stage 2 artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: s2pro-stage-2-results
+          path: stage-artifacts/stream
+
+      - name: Run benchmark stage
+        shell: bash
+        run: |
+          source omni-s2pro/bin/activate
+          export PYTHONPATH=$PWD
+          pytest tests/test_model/test_s2pro_tts_ci.py -v -s -x --concurrency 8 --s2pro-stage s2pro-stage-3-consistency
+        env:
+          HF_ENDPOINT: https://hf-mirror.com
+          SGLANG_SEEDTTS50_DIR: /github/home/seedtts-50
+          S2PRO_STAGE1_SPEED_RESULTS_DIR: ${{ github.workspace }}/stage-artifacts/nonstream
+          S2PRO_STAGE2_SPEED_RESULTS_DIR: ${{ github.workspace }}/stage-artifacts/stream
+          SGLANG_OMNI_SERVER_VERSION: v1
+
+      - name: Post-stage cleanup
+        if: always()
+        uses: ./.github/actions/omni-post-stage
+        with:
+          stage-label: S2-Pro stage 3 (consistency inputs)
+          artifact-search-root: stage-artifacts
+          artifact-path-globs: |
+            */speed_results.json
+            */wer_results.json
@@ -28,6 +28,7 @@ htmlcov/
 
 # Logs
 *.log
+*.txt
 output.wav
 
 # OS
 
@@ -1,5 +1,7 @@
 default_stages: [pre-commit, pre-push, manual]
 
+exclude: \.txt$
+
 repos:
   - repo: https://github.com/PyCQA/autoflake
     rev: v2.3.1
 
@@ -51,6 +51,20 @@
 | Qwen3-Omni | enable_audio=True  | 123.13         | 221.52        | 0.004          | 2.2            | 2.1           | PR #316 [H200, **50-sample subset**, c=1, max_tokens=2048] |
 | Qwen3-Omni | enable_audio=False | 20.297         | 74.122        | 0.392          | 24.9           | 25.4          | PR #351 [H100, full-set, c=8, max_tokens=2048, text-only server] |
 | Qwen3-Omni | enable_audio=True  | 19.579         | 23.147        | 0.009          | 3.3            | 3.3           | PR #351 [H100, 50-sample subset, c=1, max_tokens=64, timeout=120s] |
+
+Local v1 Pipeline Result (this workspace, 2026-05-01)
+
+Accuracy (summary)
+
+| Model      | Config             | accuracy | correct | failed | mc_fallback | Source                                                       |
+| ---------- | ------------------ | -------- | ------- | ------ | ----------- | ------------------------------------------------------------ |
+| Qwen3-Omni | enable_audio=False | 67.11%   | 604/900 | 0      | 26          | local v1 sweep [H200, full-set, c=8, max_tokens=2048]       |
+
+Speed (speed)
+
+| Model      | Config             | latency_mean_s | latency_p95_s | throughput_qps | tok_per_s_mean | tok_per_s_agg | Source                                                       |
+| ---------- | ------------------ | -------------- | ------------- | -------------- | -------------- | ------------- | ------------------------------------------------------------ |
+| Qwen3-Omni | enable_audio=False | 6.542          | 21.356        | 1.202          | 76.3           | 76.5          | local v1 sweep [H200, full-set, c=8, max_tokens=2048]       |
 """
 
 
@@ -180,13 +194,6 @@ async def run_mmmu_eval(config: MMMUEvalConfig) -> dict:
             request_results, config.lang, config.asr_device
         )
 
-    print_mmmu_accuracy_summary(summary, config.model)
-    print_speed_summary(
-        speed_metrics, config.model, config.max_concurrency, title="MMMU Speed"
-    )
-    if "wer" in results:
-        print_wer_summary(results["wer"]["summary"], config.model)
-
     if config.output_dir:
         save_json_results(results, config.output_dir, "mmmu_results.json")
 
@@ -216,7 +223,17 @@ def _config_from_args(args: argparse.Namespace) -> MMMUEvalConfig:
 
 async def benchmark(args: argparse.Namespace) -> dict:
     config = _config_from_args(args)
-    return await run_mmmu_eval(config)
+    results = await run_mmmu_eval(config)
+    print_mmmu_accuracy_summary(results["summary"], config.model)
+    print_speed_summary(
+        results["speed"],
+        config.model,
+        config.max_concurrency,
+        title="MMMU Speed",
+    )
+    if "wer" in results:
+        print_wer_summary(results["wer"]["summary"], config.model)
+    return results
 
 
 def main() -> None:
-Original file line number
+Diff line change
 # Logs
 *.log
 +*.txt
 output.wav
 # OS