fix(grpc-proto): lazy-load generated re-exports #7448
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (SMG) | |
| on: | |
| push: | |
| branches: [ main ] | |
| paths-ignore: | |
| - "docs/**" | |
| - "mkdocs.yml" | |
| - "*.md" | |
| pull_request: | |
| branches: [ main ] | |
| types: [opened, synchronize, reopened] | |
| paths-ignore: | |
| - "docs/**" | |
| - "mkdocs.yml" | |
| - "*.md" | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| # Per-PR groups cancel superseded runs; pushes to main key on the commit SHA so | |
| # every commit on main gets its own run and is never cancelled by a later push. | |
| concurrency: | |
| group: gateway-tests-${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || github.sha }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| env: | |
| RUSTC_WRAPPER: sccache | |
| SCCACHE_GHA_ENABLED: "true" | |
| GENAI_BENCH_IMAGE: ghcr.io/moirai-internal/genai-bench:0.0.4 | |
| jobs: | |
| pre-commit: | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Install pre-commit | |
| run: pip install pre-commit | |
| - name: Run pre-commit checks | |
| env: | |
| SKIP: rustfmt,clippy,no-commit-to-branch,branch-name-check,dco-check,no-ai-co-author | |
| run: pre-commit run --all-files --show-diff-on-failure | |
| python-lint: | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Install linting tools | |
| run: pip install ruff mypy | |
| - name: Ruff check | |
| run: ruff check e2e_test/ bindings/python/ scripts/ | |
| - name: Ruff format check | |
| run: ruff format --check e2e_test/ bindings/python/ scripts/ | |
| - name: Mypy (e2e_test) | |
| run: mypy e2e_test/ --config-file mypy.ini | |
| - name: Mypy (bindings/python) | |
| run: mypy bindings/python/ --config-file mypy.ini | |
| grpc-proto-build-check: | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.12" | |
| - name: Copy protos and build | |
| run: | | |
| rm -f crates/grpc_client/python/smg_grpc_proto/proto | |
| mkdir -p crates/grpc_client/python/smg_grpc_proto/proto | |
| cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/ | |
| pip install build grpcio-tools | |
| rm -rf crates/grpc_client/python/dist/ | |
| cd crates/grpc_client/python && python -m build | |
| - name: Test import | |
| run: | | |
| pip install crates/grpc_client/python/dist/*.whl | |
| python - <<'PY' | |
| from smg_grpc_proto import sglang_scheduler_pb2, tokenspeed_scheduler_pb2 | |
| fields = tokenspeed_scheduler_pb2.TensorData.DESCRIPTOR.fields_by_name | |
| assert "data" not in fields | |
| assert fields["shape"].number == 1 | |
| assert fields["dtype"].number == 2 | |
| assert fields["inline"].number == 3 | |
| assert fields["shm"].number == 4 | |
| assert fields["remote"].number == 5 | |
| print("OK") | |
| PY | |
| build-wheel: | |
| # CPU-only Rust/wheel/Go-FFI/WASM compile — no GPU needed. Runs on the CPU | |
| # runner pool (which also tolerates the nvidia.com/gpu taint, so it can use | |
| # the abundant stranded CPU on GPU nodes) instead of competing for scarce | |
| # GPU runners. | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| # No wheel-output cache here. It was keyed on a hash of every crate's | |
| # source, so it missed on essentially every code PR (all-or-nothing), and | |
| # on a miss it *masked* the incremental compilation cache by skipping the | |
| # build entirely — which also meant rust-cache rarely got populated. The | |
| # build now always runs and leans on rust-cache + sccache (set up in | |
| # ./.github/actions/setup-rust), which degrade gracefully: with the | |
| # dependency graph cached, only changed crates recompile. | |
| - name: Setup Rust | |
| uses: ./.github/actions/setup-rust | |
| - name: Build Python wheel and Go FFI library | |
| run: | | |
| rm -rf bindings/python/dist/ | |
| bash scripts/ci_setup_python_venv.sh | |
| bash scripts/ci_build_wheel.sh | |
| - name: Generate Python client types | |
| run: | | |
| source "$HOME/.cargo/env" | |
| mkdir -p clients/openapi | |
| cargo run -p openapi-gen -- clients/openapi/smg-openapi.yaml | |
| pip install 'datamodel-code-generator==0.54.0' | |
| datamodel-codegen \ | |
| --input clients/openapi/smg-openapi.yaml \ | |
| --input-file-type openapi \ | |
| --output clients/python/smg_client/types/_generated.py \ | |
| --output-model-type pydantic_v2.BaseModel \ | |
| --use-annotated \ | |
| --field-constraints \ | |
| --target-python-version 3.10 \ | |
| --collapse-root-models \ | |
| --use-standard-collections \ | |
| --use-union-operator | |
| sed -i 's/class \(.*\)(Enum):/class \1(str, Enum):/' clients/python/smg_client/types/_generated.py | |
| - name: Build WASM test fixtures | |
| run: | | |
| source "$HOME/.cargo/env" | |
| bash crates/wasm/tests/fixtures/build_fixtures.sh | |
| - name: Upload wheel artifact | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: smg-wheel | |
| path: bindings/python/dist/*.whl | |
| retention-days: 1 | |
| - name: Upload Go FFI library artifact | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: go-ffi-library | |
| path: bindings/golang/target/release/libsmg_go.* | |
| retention-days: 1 | |
| - name: Upload Python client types | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: python-client-types | |
| path: clients/python/smg_client/types/_generated.py | |
| retention-days: 1 | |
| - name: Upload WASM test fixtures | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: wasm-test-fixtures | |
| path: crates/wasm/tests/fixtures/*.wasm | |
| retention-days: 1 | |
| if-no-files-found: ignore | |
| - name: Show sccache stats | |
| if: always() | |
| run: sccache --show-stats | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.12" | |
| - name: Test wheel install | |
| run: | | |
| pip install bindings/python/dist/*.whl | |
| python3 -c "import smg; print('Python package: OK')" | |
| python3 -c "from smg.smg_rs import Router; print('Rust extension: OK')" | |
| python3 -m smg.launch_router --help > /dev/null && echo "Entry point: OK" | |
| python-unit-tests: | |
| needs: build-wheel | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: smg-wheel | |
| path: dist/ | |
| - name: Install wheel | |
| run: pip install dist/*.whl | |
| - name: Run Python unit tests | |
| run: | | |
| cd bindings/python | |
| python3 -m pip install pytest pytest-cov pytest-xdist | |
| pytest -q tests --cov=smg --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80 | |
| - name: Run grpc_servicer unit tests | |
| run: | | |
| rm -f crates/grpc_client/python/smg_grpc_proto/proto | |
| mkdir -p crates/grpc_client/python/smg_grpc_proto/proto | |
| cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/ | |
| pip install ./crates/grpc_client/python | |
| pytest -q grpc_servicer/tests | |
| unit-tests: | |
| needs: [detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.detect-changes.result == 'success' | |
| && ( | |
| github.event_name != 'pull_request' | |
| || needs.detect-changes.outputs.rust-ci == 'true' | |
| ) | |
| runs-on: k8s-runner-cpu | |
| permissions: | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Setup Rust | |
| uses: ./.github/actions/setup-rust | |
| - name: Verify default multimodal build does not require OpenCV | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cargo check -p llm-multimodal | |
| cargo check --manifest-path bindings/python/Cargo.toml | |
| cargo check --manifest-path bindings/golang/Cargo.toml | |
| - name: Install OpenCV build dependencies | |
| run: AUTO_INSTALL=1 bash scripts/install_opencv.sh | |
| - name: Build WASM test fixtures | |
| run: | | |
| source "$HOME/.cargo/env" | |
| bash crates/wasm/tests/fixtures/build_fixtures.sh | |
| - name: Run lint | |
| run: | | |
| source "$HOME/.cargo/env" | |
| rustup component add clippy | |
| cargo clippy --all-targets --all-features -- -D warnings | |
| - name: Run fmt | |
| run: | | |
| source "$HOME/.cargo/env" | |
| rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt | |
| rustup toolchain install nightly --profile minimal | |
| cargo +nightly fmt -- --check | |
| - name: Generate vision golden fixtures | |
| run: | | |
| python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu | |
| python -m pip install transformers pillow numpy scipy | |
| python crates/multimodal/scripts/generate_vision_golden.py | |
| - name: Run Rust tests | |
| timeout-minutes: 30 | |
| run: | | |
| source "$HOME/.cargo/env" | |
| cargo test | |
| - name: Show sccache stats | |
| if: always() | |
| run: sccache --show-stats | |
| # --- Benchmarks (standalone) --- | |
| benchmarks: | |
| needs: build-wheel | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 36 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Setup SGLang backend | |
| uses: ./.github/actions/setup-sglang | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: smg-wheel | |
| path: wheel/ | |
| - name: Download WASM test fixtures | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: wasm-test-fixtures | |
| path: crates/wasm/tests/fixtures/ | |
| continue-on-error: true | |
| - name: Download Python client types | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: python-client-types | |
| path: clients/python/smg_client/types/ | |
| - name: Install wheel and test dependencies | |
| run: | | |
| pip uninstall -y smg || true | |
| pip install wheel/*.whl | |
| bash scripts/ci_install_e2e_deps.sh | |
| - name: Pull genai-bench image | |
| run: docker pull ${{ env.GENAI_BENCH_IMAGE }} | |
| - name: Run benchmarks | |
| env: | |
| ROUTER_LOCAL_MODEL_PATH: /models | |
| E2E_LOG_DIR: benchmark-logs | |
| GENAI_BENCH_TEST_TIMEOUT: "480" | |
| run: | | |
| mkdir -p benchmark-logs | |
| bash scripts/ci_killall_sglang.sh "nuke_gpus" | |
| SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 pytest e2e_test/benchmarks \ | |
| --ignore=e2e_test/benchmarks/test_go_bindings_perf.py \ | |
| --ignore=e2e_test/benchmarks/test_nightly_perf.py \ | |
| -s -vv | |
| - name: Upload benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: genai-bench-results-all-policies | |
| path: benchmark_**/ | |
| - name: Worker failure diagnostics | |
| if: failure() || cancelled() | |
| run: bash scripts/ci_dump_worker_logs.sh benchmark-logs benchmark-worker-logs | |
| - name: Upload worker logs | |
| if: failure() || cancelled() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: benchmark-worker-logs | |
| path: benchmark-logs/ | |
| if-no-files-found: ignore | |
| retention-days: 7 | |
| detect-changes: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| outputs: | |
| common: ${{ steps.filter.outputs.common }} | |
| chat-completions: ${{ steps.filter.outputs.chat-completions }} | |
| completions: ${{ steps.filter.outputs.completions }} | |
| agentic: ${{ steps.filter.outputs.agentic }} | |
| embeddings: ${{ steps.filter.outputs.embeddings }} | |
| go-bindings: ${{ steps.filter.outputs.go-bindings }} | |
| rust-ci: ${{ steps.filter.outputs.rust-ci }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - uses: dorny/paths-filter@v4 | |
| id: filter | |
| with: | |
| filters: | | |
| common: | |
| - 'model_gateway/**' | |
| - 'crates/protocols/**' | |
| - 'bindings/**' | |
| - 'e2e_test/conftest.py' | |
| - 'e2e_test/infra/**' | |
| - 'e2e_test/fixtures/**' | |
| - 'Cargo.lock' | |
| - '.github/actions/**' | |
| - '.github/workflows/pr-test-rust.yml' | |
| - '.github/workflows/e2e-gpu-job.yml' | |
| - 'scripts/ci_setup_python_venv.sh' | |
| - 'scripts/ci_install_sglang.sh' | |
| - 'scripts/ci_install_vllm.sh' | |
| - 'scripts/ci_install_tokenspeed.sh' | |
| - 'scripts/ci_install_e2e_deps.sh' | |
| - 'scripts/ci_killall_sglang.sh' | |
| - 'scripts/ci_build_wheel.sh' | |
| - 'crates/tokenizer/**' | |
| - 'crates/tool_parser/**' | |
| chat-completions: | |
| - 'crates/reasoning_parser/**' | |
| - 'crates/multimodal/**' | |
| - 'crates/grpc_client/**' | |
| - 'grpc_servicer/**' | |
| - 'e2e_test/chat_completions/**' | |
| - 'e2e_test/router/**' | |
| - 'scripts/ci_install_vllm.sh' | |
| - 'scripts/ci_install_trtllm.sh' | |
| - 'scripts/ci_install_tokenspeed.sh' | |
| agentic: | |
| - 'crates/mcp/**' | |
| - 'crates/data_connector/**' | |
| - 'e2e_test/responses/**' | |
| - 'e2e_test/messages/**' | |
| - 'scripts/ci_agentic_svc_deps.sh' | |
| - 'scripts/oracle_flyway/**' | |
| completions: | |
| - 'crates/grpc_client/**' | |
| - 'grpc_servicer/**' | |
| - 'e2e_test/completions/**' | |
| embeddings: | |
| - 'e2e_test/embeddings/**' | |
| go-bindings: | |
| - 'e2e_test/bindings_go/**' | |
| rust-ci: | |
| - 'Cargo.toml' | |
| - 'Cargo.lock' | |
| - 'rustfmt.toml' | |
| - 'clippy.toml' | |
| - 'crates/**' | |
| - 'test_support/**' | |
| - 'model_gateway/**' | |
| - 'bindings/python/Cargo.toml' | |
| - 'bindings/python/build.rs' | |
| - 'bindings/python/src/**' | |
| - 'bindings/golang/Cargo.toml' | |
| - 'bindings/golang/src/**' | |
| - 'clients/rust/**' | |
| - 'clients/openapi-gen/**' | |
| - '.cargo/**' | |
| - '.github/actions/setup-rust/**' | |
| - 'scripts/ci_install_rust.sh' | |
| - '.github/workflows/pr-test-rust.yml' | |
| - 'examples/wasm/wasm-guest-storage-hook/**' | |
| - 'examples/wasm/wasm-guest-storage-hook-passthrough/**' | |
| # --- GPU E2E: organized by GPU tier + API, engine as matrix axis --- | |
| # === 1 GPU === | |
| e2e-1gpu-chat: | |
| name: e2e-1gpu-chat (${{ matrix.engine }}) | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.chat-completions == 'true'))) | |
| # Now also runs the previously-2-GPU chat tests (gpt-oss-20b, | |
| # Qwen2.5-14B) since they're tp=1. | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 36 | |
| test_timeout: 28 | |
| - engine: vllm | |
| timeout: 24 | |
| test_timeout: 18 | |
| - engine: trtllm | |
| timeout: 32 | |
| test_timeout: 18 | |
| # tokenspeed builds from source (~30m cold), so keep the job | |
| # timeout generous even though the test step is short. | |
| # Admin-ops e2e (flush/profile) piggybacks here because this is | |
| # the only lane with tokenspeed installed — e2e-1gpu-gateway | |
| # installs sglang/vllm only. The engine marker filter keeps the | |
| # rest of e2e_test/router out of this job. | |
| - engine: tokenspeed | |
| timeout: 50 | |
| test_timeout: 18 | |
| test_dirs: e2e_test/chat_completions e2e_test/router/test_admin_ops.py | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "1" | |
| runner: 1-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_timeout: ${{ matrix.test_timeout }} | |
| test_dirs: ${{ matrix.test_dirs || 'e2e_test/chat_completions' }} | |
| secrets: inherit | |
| e2e-1gpu-completions: | |
| name: e2e-1gpu-completions (${{ matrix.engine }}) | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.completions == 'true'))) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 20 | |
| - engine: vllm | |
| timeout: 20 | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "1" | |
| runner: 1-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_dirs: e2e_test/completions | |
| secrets: inherit | |
| e2e-1gpu-embeddings: | |
| name: e2e-1gpu-embeddings (${{ matrix.engine }}) | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.embeddings == 'true'))) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 20 | |
| - engine: vllm | |
| timeout: 20 | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "1" | |
| runner: 1-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_dirs: e2e_test/embeddings | |
| extra_deps: "sentence-transformers" | |
| secrets: inherit | |
| e2e-1gpu-gateway: | |
| name: e2e-1gpu-gateway (${{ matrix.engine }}) | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.chat-completions == 'true'))) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 20 | |
| - engine: vllm | |
| timeout: 20 | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "1" | |
| runner: 1-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_dirs: e2e_test/router | |
| test_filter: "--ignore=e2e_test/router/test_pd_mmlu.py" | |
| secrets: inherit | |
| # === 2 GPU === | |
| # e2e-2gpu-chat was retired: all chat_completions tests are tp=1 now | |
| # and run under e2e-1gpu-chat above. | |
| e2e-1gpu-responses: | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.agentic == 'true'))) | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: sglang | |
| gpu_tier: "1" | |
| runner: 1-gpu-h100 | |
| timeout: 28 | |
| test_timeout: 20 | |
| test_dirs: e2e_test/responses | |
| setup_agentic_deps: true | |
| secrets: inherit | |
| e2e-2gpu-pd: | |
| name: e2e-2gpu-pd (${{ matrix.engine }}${{ matrix.kv_backend && format('-{0}', matrix.kv_backend) || '' }}) | |
| needs: [e2e-1gpu-gateway, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.e2e-1gpu-gateway.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.chat-completions == 'true'))) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 30 | |
| - engine: vllm | |
| timeout: 30 | |
| - engine: vllm | |
| kv_backend: mooncake | |
| timeout: 30 | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "2" | |
| runner: 2-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_dirs: e2e_test/router | |
| vllm_kv_backend: ${{ matrix.kv_backend || 'nixl' }} | |
| secrets: inherit | |
| # === 4 GPU === | |
| e2e-4gpu-chat: | |
| name: e2e-4gpu-chat (${{ matrix.engine }}) | |
| needs: [e2e-1gpu-chat] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - engine: sglang | |
| timeout: 30 | |
| - engine: vllm | |
| timeout: 30 | |
| - engine: trtllm | |
| timeout: 45 | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: ${{ matrix.engine }} | |
| gpu_tier: "4" | |
| runner: 4-gpu-h100 | |
| timeout: ${{ matrix.timeout }} | |
| test_dirs: e2e_test/chat_completions | |
| secrets: inherit | |
| e2e-4gpu-gateway: | |
| name: e2e-4gpu-gateway | |
| needs: [e2e-1gpu-gateway] | |
| uses: ./.github/workflows/e2e-gpu-job.yml | |
| with: | |
| engine: sglang | |
| gpu_tier: "4" | |
| runner: 4-gpu-h100 | |
| timeout: 25 | |
| test_dirs: e2e_test/router | |
| test_filter: "-k TestIGWMixedWorkerClassification" | |
| secrets: inherit | |
| # --- Vendor E2E: CPU-only cloud backend tests --- | |
| e2e-vendor: | |
| name: ${{ matrix.name }} | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.agentic == 'true'))) | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - name: anthropic-messages | |
| vendor: anthropic | |
| test_path: e2e_test/messages | |
| timeout: 20 | |
| setup_agentic_deps: true | |
| - name: openai-responses | |
| vendor: openai | |
| test_path: e2e_test/responses | |
| timeout: 30 | |
| setup_agentic_deps: true | |
| - name: openai-realtime | |
| vendor: openai | |
| test_path: e2e_test/realtime | |
| timeout: 20 | |
| - name: xai-responses | |
| vendor: xai | |
| test_path: e2e_test/responses | |
| timeout: 15 | |
| setup_agentic_deps: true | |
| runs-on: k8s-runner-cpu | |
| timeout-minutes: ${{ matrix.timeout }} | |
| env: | |
| E2E_VENDOR: ${{ matrix.vendor }} | |
| E2E_GPU_TIER: "0" | |
| SHOW_ROUTER_LOGS: "1" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Check shared services | |
| if: matrix.setup_agentic_deps | |
| run: bash scripts/ci_agentic_svc_deps.sh check --oracle oracle-db --brave brave-search-mcp | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: smg-wheel | |
| path: wheel/ | |
| - name: Download WASM test fixtures | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: wasm-test-fixtures | |
| path: crates/wasm/tests/fixtures/ | |
| continue-on-error: true | |
| - name: Download Python client types | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: python-client-types | |
| path: clients/python/smg_client/types/ | |
| - name: Install wheel and test dependencies | |
| run: | | |
| pip uninstall -y smg || true | |
| pip install wheel/*.whl | |
| bash scripts/ci_install_e2e_deps.sh | |
| - name: Setup Oracle | |
| if: matrix.setup_agentic_deps | |
| run: | | |
| bash scripts/ci_agentic_svc_deps.sh setup-oracle-client | |
| bash scripts/ci_agentic_svc_deps.sh create-oracle-user oracle-db | |
| bash scripts/ci_agentic_svc_deps.sh create-oracle-flyway-user oracle-db | |
| - name: Run E2E tests | |
| env: | |
| BRAVE_MCP_HOST: ${{ matrix.setup_agentic_deps && 'brave-search-mcp' || '' }} | |
| run: | | |
| ROUTER_LOCAL_MODEL_PATH="/models" pytest ${{ matrix.test_path }} \ | |
| -m "not external" \ | |
| --reruns 2 --reruns-delay 5 \ | |
| -s -vv | |
| - name: Cleanup Oracle test users | |
| if: always() && matrix.setup_agentic_deps | |
| run: | | |
| bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-flyway-user oracle-db | |
| bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-user oracle-db | |
| go-unit-tests: | |
| name: go-unit-tests | |
| needs: build-wheel | |
| runs-on: k8s-runner-cpu | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Setup Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version: '1.24' | |
| cache: true | |
| cache-dependency-path: bindings/golang/go.sum | |
| - name: Install build tools | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential | |
| - name: Download Go FFI library | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: go-ffi-library | |
| path: bindings/golang/target/release/ | |
| - name: Verify Go FFI library | |
| run: ls -la bindings/golang/target/release/libsmg_go.* | |
| - name: Run Go unit tests | |
| run: | | |
| cd bindings/golang | |
| export CGO_ENABLED=1 | |
| export CGO_LDFLAGS="-L$(pwd)/target/release" | |
| export LD_LIBRARY_PATH="$(pwd)/target/release:$LD_LIBRARY_PATH" | |
| go test -v ./... | |
| go-bindings-e2e: | |
| name: go-bindings-e2e | |
| needs: [build-wheel, detect-changes] | |
| if: >- | |
| always() | |
| && !cancelled() | |
| && needs.build-wheel.result == 'success' | |
| && (github.event_name != 'pull_request' | |
| || (needs.detect-changes.result == 'success' | |
| && (needs.detect-changes.outputs.common == 'true' | |
| || needs.detect-changes.outputs.go-bindings == 'true'))) | |
| runs-on: 1-gpu-h100 | |
| timeout-minutes: 20 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Setup SGLang backend | |
| uses: ./.github/actions/setup-sglang | |
| - name: Setup Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version: '1.24' | |
| cache: true | |
| cache-dependency-path: bindings/golang/go.sum | |
| - name: Download Go FFI library | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: go-ffi-library | |
| path: bindings/golang/target/release/ | |
| - name: Verify Go FFI library | |
| run: ls -la bindings/golang/target/release/libsmg_go.* | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: smg-wheel | |
| path: wheel/ | |
| - name: Download Python client types | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: python-client-types | |
| path: clients/python/smg_client/types/ | |
| - name: Install wheel and test dependencies | |
| run: | | |
| pip uninstall -y smg || true | |
| pip install wheel/*.whl | |
| bash scripts/ci_install_e2e_deps.sh | |
| - name: Run Go OAI server E2E tests | |
| run: | | |
| bash scripts/ci_killall_sglang.sh "nuke_gpus" | |
| export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release" | |
| export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH" | |
| SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \ | |
| E2E_LOG_DIR=e2e-logs \ | |
| pytest --reruns 2 --reruns-delay 5 e2e_test/bindings_go -s -vv | |
| - name: Worker failure diagnostics | |
| if: failure() || cancelled() | |
| run: bash scripts/ci_dump_worker_logs.sh e2e-logs "e2e-worker-logs-go-bindings" | |
| - name: Upload worker logs | |
| if: failure() || cancelled() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: e2e-worker-logs-go-bindings | |
| path: e2e-logs/ | |
| retention-days: 7 | |
| if-no-files-found: ignore | |
| go-bindings-benchmark: | |
| name: go-bindings-benchmark | |
| needs: build-wheel | |
| if: false # Disabled | |
| runs-on: k8s-runner-gpu | |
| timeout-minutes: 32 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Setup SGLang backend | |
| uses: ./.github/actions/setup-sglang | |
| - name: Setup Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version: '1.24' | |
| cache: true | |
| cache-dependency-path: bindings/golang/go.sum | |
| - name: Download Go FFI library | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: go-ffi-library | |
| path: bindings/golang/target/release/ | |
| - name: Verify Go FFI library | |
| run: ls -la bindings/golang/target/release/libsmg_go.* | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: smg-wheel | |
| path: wheel/ | |
| - name: Install wheel | |
| run: | | |
| pip uninstall -y smg || true | |
| pip install wheel/*.whl | |
| - name: Install test dependencies | |
| run: | | |
| python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy | |
| - name: Pull genai-bench image | |
| run: docker pull ${{ env.GENAI_BENCH_IMAGE }} | |
| - name: Run Go bindings benchmark | |
| run: | | |
| bash scripts/ci_killall_sglang.sh "nuke_gpus" | |
| export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release" | |
| export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH" | |
| SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \ | |
| pytest e2e_test/benchmarks/test_go_bindings_perf.py -s -vv | |
| - name: Upload benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: genai-bench-results-go-bindings | |
| path: benchmark_go_bindings/ | |
| finish: | |
| needs: [pre-commit, python-lint, grpc-proto-build-check, build-wheel, python-unit-tests, unit-tests, benchmarks, e2e-1gpu-chat, e2e-1gpu-completions, e2e-1gpu-embeddings, e2e-1gpu-gateway, e2e-1gpu-responses, e2e-2gpu-pd, e2e-4gpu-chat, e2e-4gpu-gateway, e2e-vendor, go-unit-tests, go-bindings-e2e] | |
| if: always() | |
| runs-on: k8s-runner-cpu | |
| permissions: {} | |
| steps: | |
| - name: Check CI result | |
| run: | | |
| if [[ "${{ needs.pre-commit.result }}" == "failure" || \ | |
| "${{ needs.python-lint.result }}" == "failure" || \ | |
| "${{ needs.grpc-proto-build-check.result }}" == "failure" || \ | |
| "${{ needs.build-wheel.result }}" == "failure" || \ | |
| "${{ needs.python-unit-tests.result }}" == "failure" || \ | |
| "${{ needs.unit-tests.result }}" == "failure" || \ | |
| "${{ needs.benchmarks.result }}" == "failure" || \ | |
| "${{ needs.e2e-1gpu-chat.result }}" == "failure" || \ | |
| "${{ needs.e2e-1gpu-completions.result }}" == "failure" || \ | |
| "${{ needs.e2e-1gpu-embeddings.result }}" == "failure" || \ | |
| "${{ needs.e2e-1gpu-gateway.result }}" == "failure" || \ | |
| "${{ needs.e2e-1gpu-responses.result }}" == "failure" || \ | |
| "${{ needs.e2e-2gpu-pd.result }}" == "failure" || \ | |
| "${{ needs.e2e-4gpu-chat.result }}" == "failure" || \ | |
| "${{ needs.e2e-4gpu-gateway.result }}" == "failure" || \ | |
| "${{ needs.e2e-vendor.result }}" == "failure" || \ | |
| "${{ needs.go-unit-tests.result }}" == "failure" || \ | |
| "${{ needs.go-bindings-e2e.result }}" == "failure" ]]; then | |
| echo "One or more jobs failed" | |
| exit 1 | |
| else | |
| echo "All jobs completed successfully" | |
| fi | |
| summarize-benchmarks: | |
| needs: [benchmarks] | |
| runs-on: k8s-runner-cpu | |
| if: success() | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.13" | |
| - name: Download gateway benchmark results | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: genai-bench-results-all-policies | |
| - name: Create benchmark summary | |
| run: python3 e2e_test/benchmarks/summarize.py . |