Skip to content

fix(grpc-proto): lazy-load generated re-exports #7448

fix(grpc-proto): lazy-load generated re-exports

fix(grpc-proto): lazy-load generated re-exports #7448

Workflow file for this run

name: PR Test (SMG)
on:
push:
branches: [ main ]
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "*.md"
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened]
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "*.md"
workflow_dispatch:
permissions:
contents: read
# Per-PR groups cancel superseded runs; pushes to main key on the commit SHA so
# every commit on main gets its own run and is never cancelled by a later push.
concurrency:
group: gateway-tests-${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || github.sha }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
GENAI_BENCH_IMAGE: ghcr.io/moirai-internal/genai-bench:0.0.4
jobs:
pre-commit:
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install pre-commit
run: pip install pre-commit
- name: Run pre-commit checks
env:
SKIP: rustfmt,clippy,no-commit-to-branch,branch-name-check,dco-check,no-ai-co-author
run: pre-commit run --all-files --show-diff-on-failure
python-lint:
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install linting tools
run: pip install ruff mypy
- name: Ruff check
run: ruff check e2e_test/ bindings/python/ scripts/
- name: Ruff format check
run: ruff format --check e2e_test/ bindings/python/ scripts/
- name: Mypy (e2e_test)
run: mypy e2e_test/ --config-file mypy.ini
- name: Mypy (bindings/python)
run: mypy bindings/python/ --config-file mypy.ini
grpc-proto-build-check:
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Copy protos and build
run: |
rm -f crates/grpc_client/python/smg_grpc_proto/proto
mkdir -p crates/grpc_client/python/smg_grpc_proto/proto
cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/
pip install build grpcio-tools
rm -rf crates/grpc_client/python/dist/
cd crates/grpc_client/python && python -m build
- name: Test import
run: |
pip install crates/grpc_client/python/dist/*.whl
python - <<'PY'
from smg_grpc_proto import sglang_scheduler_pb2, tokenspeed_scheduler_pb2
fields = tokenspeed_scheduler_pb2.TensorData.DESCRIPTOR.fields_by_name
assert "data" not in fields
assert fields["shape"].number == 1
assert fields["dtype"].number == 2
assert fields["inline"].number == 3
assert fields["shm"].number == 4
assert fields["remote"].number == 5
print("OK")
PY
build-wheel:
# CPU-only Rust/wheel/Go-FFI/WASM compile — no GPU needed. Runs on the CPU
# runner pool (which also tolerates the nvidia.com/gpu taint, so it can use
# the abundant stranded CPU on GPU nodes) instead of competing for scarce
# GPU runners.
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
# No wheel-output cache here. It was keyed on a hash of every crate's
# source, so it missed on essentially every code PR (all-or-nothing), and
# on a miss it *masked* the incremental compilation cache by skipping the
# build entirely — which also meant rust-cache rarely got populated. The
# build now always runs and leans on rust-cache + sccache (set up in
# ./.github/actions/setup-rust), which degrade gracefully: with the
# dependency graph cached, only changed crates recompile.
- name: Setup Rust
uses: ./.github/actions/setup-rust
- name: Build Python wheel and Go FFI library
run: |
rm -rf bindings/python/dist/
bash scripts/ci_setup_python_venv.sh
bash scripts/ci_build_wheel.sh
- name: Generate Python client types
run: |
source "$HOME/.cargo/env"
mkdir -p clients/openapi
cargo run -p openapi-gen -- clients/openapi/smg-openapi.yaml
pip install 'datamodel-code-generator==0.54.0'
datamodel-codegen \
--input clients/openapi/smg-openapi.yaml \
--input-file-type openapi \
--output clients/python/smg_client/types/_generated.py \
--output-model-type pydantic_v2.BaseModel \
--use-annotated \
--field-constraints \
--target-python-version 3.10 \
--collapse-root-models \
--use-standard-collections \
--use-union-operator
sed -i 's/class \(.*\)(Enum):/class \1(str, Enum):/' clients/python/smg_client/types/_generated.py
- name: Build WASM test fixtures
run: |
source "$HOME/.cargo/env"
bash crates/wasm/tests/fixtures/build_fixtures.sh
- name: Upload wheel artifact
uses: actions/upload-artifact@v7
with:
name: smg-wheel
path: bindings/python/dist/*.whl
retention-days: 1
- name: Upload Go FFI library artifact
uses: actions/upload-artifact@v7
with:
name: go-ffi-library
path: bindings/golang/target/release/libsmg_go.*
retention-days: 1
- name: Upload Python client types
uses: actions/upload-artifact@v7
with:
name: python-client-types
path: clients/python/smg_client/types/_generated.py
retention-days: 1
- name: Upload WASM test fixtures
uses: actions/upload-artifact@v7
with:
name: wasm-test-fixtures
path: crates/wasm/tests/fixtures/*.wasm
retention-days: 1
if-no-files-found: ignore
- name: Show sccache stats
if: always()
run: sccache --show-stats
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Test wheel install
run: |
pip install bindings/python/dist/*.whl
python3 -c "import smg; print('Python package: OK')"
python3 -c "from smg.smg_rs import Router; print('Rust extension: OK')"
python3 -m smg.launch_router --help > /dev/null && echo "Entry point: OK"
python-unit-tests:
needs: build-wheel
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Download wheel artifact
uses: actions/download-artifact@v8
with:
name: smg-wheel
path: dist/
- name: Install wheel
run: pip install dist/*.whl
- name: Run Python unit tests
run: |
cd bindings/python
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q tests --cov=smg --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80
- name: Run grpc_servicer unit tests
run: |
rm -f crates/grpc_client/python/smg_grpc_proto/proto
mkdir -p crates/grpc_client/python/smg_grpc_proto/proto
cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/
pip install ./crates/grpc_client/python
pytest -q grpc_servicer/tests
unit-tests:
needs: [detect-changes]
if: >-
always()
&& !cancelled()
&& needs.detect-changes.result == 'success'
&& (
github.event_name != 'pull_request'
|| needs.detect-changes.outputs.rust-ci == 'true'
)
runs-on: k8s-runner-cpu
permissions:
contents: read
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Setup Rust
uses: ./.github/actions/setup-rust
- name: Verify default multimodal build does not require OpenCV
run: |
source "$HOME/.cargo/env"
cargo check -p llm-multimodal
cargo check --manifest-path bindings/python/Cargo.toml
cargo check --manifest-path bindings/golang/Cargo.toml
- name: Install OpenCV build dependencies
run: AUTO_INSTALL=1 bash scripts/install_opencv.sh
- name: Build WASM test fixtures
run: |
source "$HOME/.cargo/env"
bash crates/wasm/tests/fixtures/build_fixtures.sh
- name: Run lint
run: |
source "$HOME/.cargo/env"
rustup component add clippy
cargo clippy --all-targets --all-features -- -D warnings
- name: Run fmt
run: |
source "$HOME/.cargo/env"
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
rustup toolchain install nightly --profile minimal
cargo +nightly fmt -- --check
- name: Generate vision golden fixtures
run: |
python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
python -m pip install transformers pillow numpy scipy
python crates/multimodal/scripts/generate_vision_golden.py
- name: Run Rust tests
timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cargo test
- name: Show sccache stats
if: always()
run: sccache --show-stats
# --- Benchmarks (standalone) ---
benchmarks:
needs: build-wheel
runs-on: 4-gpu-h100
timeout-minutes: 36
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup SGLang backend
uses: ./.github/actions/setup-sglang
- name: Download wheel artifact
uses: actions/download-artifact@v8
with:
name: smg-wheel
path: wheel/
- name: Download WASM test fixtures
uses: actions/download-artifact@v8
with:
name: wasm-test-fixtures
path: crates/wasm/tests/fixtures/
continue-on-error: true
- name: Download Python client types
uses: actions/download-artifact@v8
with:
name: python-client-types
path: clients/python/smg_client/types/
- name: Install wheel and test dependencies
run: |
pip uninstall -y smg || true
pip install wheel/*.whl
bash scripts/ci_install_e2e_deps.sh
- name: Pull genai-bench image
run: docker pull ${{ env.GENAI_BENCH_IMAGE }}
- name: Run benchmarks
env:
ROUTER_LOCAL_MODEL_PATH: /models
E2E_LOG_DIR: benchmark-logs
GENAI_BENCH_TEST_TIMEOUT: "480"
run: |
mkdir -p benchmark-logs
bash scripts/ci_killall_sglang.sh "nuke_gpus"
SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 pytest e2e_test/benchmarks \
--ignore=e2e_test/benchmarks/test_go_bindings_perf.py \
--ignore=e2e_test/benchmarks/test_nightly_perf.py \
-s -vv
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v7
with:
name: genai-bench-results-all-policies
path: benchmark_**/
- name: Worker failure diagnostics
if: failure() || cancelled()
run: bash scripts/ci_dump_worker_logs.sh benchmark-logs benchmark-worker-logs
- name: Upload worker logs
if: failure() || cancelled()
uses: actions/upload-artifact@v7
with:
name: benchmark-worker-logs
path: benchmark-logs/
if-no-files-found: ignore
retention-days: 7
detect-changes:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
outputs:
common: ${{ steps.filter.outputs.common }}
chat-completions: ${{ steps.filter.outputs.chat-completions }}
completions: ${{ steps.filter.outputs.completions }}
agentic: ${{ steps.filter.outputs.agentic }}
embeddings: ${{ steps.filter.outputs.embeddings }}
go-bindings: ${{ steps.filter.outputs.go-bindings }}
rust-ci: ${{ steps.filter.outputs.rust-ci }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v4
id: filter
with:
filters: |
common:
- 'model_gateway/**'
- 'crates/protocols/**'
- 'bindings/**'
- 'e2e_test/conftest.py'
- 'e2e_test/infra/**'
- 'e2e_test/fixtures/**'
- 'Cargo.lock'
- '.github/actions/**'
- '.github/workflows/pr-test-rust.yml'
- '.github/workflows/e2e-gpu-job.yml'
- 'scripts/ci_setup_python_venv.sh'
- 'scripts/ci_install_sglang.sh'
- 'scripts/ci_install_vllm.sh'
- 'scripts/ci_install_tokenspeed.sh'
- 'scripts/ci_install_e2e_deps.sh'
- 'scripts/ci_killall_sglang.sh'
- 'scripts/ci_build_wheel.sh'
- 'crates/tokenizer/**'
- 'crates/tool_parser/**'
chat-completions:
- 'crates/reasoning_parser/**'
- 'crates/multimodal/**'
- 'crates/grpc_client/**'
- 'grpc_servicer/**'
- 'e2e_test/chat_completions/**'
- 'e2e_test/router/**'
- 'scripts/ci_install_vllm.sh'
- 'scripts/ci_install_trtllm.sh'
- 'scripts/ci_install_tokenspeed.sh'
agentic:
- 'crates/mcp/**'
- 'crates/data_connector/**'
- 'e2e_test/responses/**'
- 'e2e_test/messages/**'
- 'scripts/ci_agentic_svc_deps.sh'
- 'scripts/oracle_flyway/**'
completions:
- 'crates/grpc_client/**'
- 'grpc_servicer/**'
- 'e2e_test/completions/**'
embeddings:
- 'e2e_test/embeddings/**'
go-bindings:
- 'e2e_test/bindings_go/**'
rust-ci:
- 'Cargo.toml'
- 'Cargo.lock'
- 'rustfmt.toml'
- 'clippy.toml'
- 'crates/**'
- 'test_support/**'
- 'model_gateway/**'
- 'bindings/python/Cargo.toml'
- 'bindings/python/build.rs'
- 'bindings/python/src/**'
- 'bindings/golang/Cargo.toml'
- 'bindings/golang/src/**'
- 'clients/rust/**'
- 'clients/openapi-gen/**'
- '.cargo/**'
- '.github/actions/setup-rust/**'
- 'scripts/ci_install_rust.sh'
- '.github/workflows/pr-test-rust.yml'
- 'examples/wasm/wasm-guest-storage-hook/**'
- 'examples/wasm/wasm-guest-storage-hook-passthrough/**'
# --- GPU E2E: organized by GPU tier + API, engine as matrix axis ---
# === 1 GPU ===
e2e-1gpu-chat:
name: e2e-1gpu-chat (${{ matrix.engine }})
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.chat-completions == 'true')))
# Now also runs the previously-2-GPU chat tests (gpt-oss-20b,
# Qwen2.5-14B) since they're tp=1.
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 36
test_timeout: 28
- engine: vllm
timeout: 24
test_timeout: 18
- engine: trtllm
timeout: 32
test_timeout: 18
# tokenspeed builds from source (~30m cold), so keep the job
# timeout generous even though the test step is short.
# Admin-ops e2e (flush/profile) piggybacks here because this is
# the only lane with tokenspeed installed — e2e-1gpu-gateway
# installs sglang/vllm only. The engine marker filter keeps the
# rest of e2e_test/router out of this job.
- engine: tokenspeed
timeout: 50
test_timeout: 18
test_dirs: e2e_test/chat_completions e2e_test/router/test_admin_ops.py
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "1"
runner: 1-gpu-h100
timeout: ${{ matrix.timeout }}
test_timeout: ${{ matrix.test_timeout }}
test_dirs: ${{ matrix.test_dirs || 'e2e_test/chat_completions' }}
secrets: inherit
e2e-1gpu-completions:
name: e2e-1gpu-completions (${{ matrix.engine }})
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.completions == 'true')))
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 20
- engine: vllm
timeout: 20
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "1"
runner: 1-gpu-h100
timeout: ${{ matrix.timeout }}
test_dirs: e2e_test/completions
secrets: inherit
e2e-1gpu-embeddings:
name: e2e-1gpu-embeddings (${{ matrix.engine }})
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.embeddings == 'true')))
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 20
- engine: vllm
timeout: 20
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "1"
runner: 1-gpu-h100
timeout: ${{ matrix.timeout }}
test_dirs: e2e_test/embeddings
extra_deps: "sentence-transformers"
secrets: inherit
e2e-1gpu-gateway:
name: e2e-1gpu-gateway (${{ matrix.engine }})
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.chat-completions == 'true')))
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 20
- engine: vllm
timeout: 20
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "1"
runner: 1-gpu-h100
timeout: ${{ matrix.timeout }}
test_dirs: e2e_test/router
test_filter: "--ignore=e2e_test/router/test_pd_mmlu.py"
secrets: inherit
# === 2 GPU ===
# e2e-2gpu-chat was retired: all chat_completions tests are tp=1 now
# and run under e2e-1gpu-chat above.
e2e-1gpu-responses:
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.agentic == 'true')))
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: sglang
gpu_tier: "1"
runner: 1-gpu-h100
timeout: 28
test_timeout: 20
test_dirs: e2e_test/responses
setup_agentic_deps: true
secrets: inherit
e2e-2gpu-pd:
name: e2e-2gpu-pd (${{ matrix.engine }}${{ matrix.kv_backend && format('-{0}', matrix.kv_backend) || '' }})
needs: [e2e-1gpu-gateway, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.e2e-1gpu-gateway.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.chat-completions == 'true')))
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 30
- engine: vllm
timeout: 30
- engine: vllm
kv_backend: mooncake
timeout: 30
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "2"
runner: 2-gpu-h100
timeout: ${{ matrix.timeout }}
test_dirs: e2e_test/router
vllm_kv_backend: ${{ matrix.kv_backend || 'nixl' }}
secrets: inherit
# === 4 GPU ===
e2e-4gpu-chat:
name: e2e-4gpu-chat (${{ matrix.engine }})
needs: [e2e-1gpu-chat]
strategy:
fail-fast: false
matrix:
include:
- engine: sglang
timeout: 30
- engine: vllm
timeout: 30
- engine: trtllm
timeout: 45
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: ${{ matrix.engine }}
gpu_tier: "4"
runner: 4-gpu-h100
timeout: ${{ matrix.timeout }}
test_dirs: e2e_test/chat_completions
secrets: inherit
e2e-4gpu-gateway:
name: e2e-4gpu-gateway
needs: [e2e-1gpu-gateway]
uses: ./.github/workflows/e2e-gpu-job.yml
with:
engine: sglang
gpu_tier: "4"
runner: 4-gpu-h100
timeout: 25
test_dirs: e2e_test/router
test_filter: "-k TestIGWMixedWorkerClassification"
secrets: inherit
# --- Vendor E2E: CPU-only cloud backend tests ---
e2e-vendor:
name: ${{ matrix.name }}
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.agentic == 'true')))
permissions:
contents: read
strategy:
fail-fast: false
matrix:
include:
- name: anthropic-messages
vendor: anthropic
test_path: e2e_test/messages
timeout: 20
setup_agentic_deps: true
- name: openai-responses
vendor: openai
test_path: e2e_test/responses
timeout: 30
setup_agentic_deps: true
- name: openai-realtime
vendor: openai
test_path: e2e_test/realtime
timeout: 20
- name: xai-responses
vendor: xai
test_path: e2e_test/responses
timeout: 15
setup_agentic_deps: true
runs-on: k8s-runner-cpu
timeout-minutes: ${{ matrix.timeout }}
env:
E2E_VENDOR: ${{ matrix.vendor }}
E2E_GPU_TIER: "0"
SHOW_ROUTER_LOGS: "1"
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Check shared services
if: matrix.setup_agentic_deps
run: bash scripts/ci_agentic_svc_deps.sh check --oracle oracle-db --brave brave-search-mcp
- name: Download wheel artifact
uses: actions/download-artifact@v8
with:
name: smg-wheel
path: wheel/
- name: Download WASM test fixtures
uses: actions/download-artifact@v8
with:
name: wasm-test-fixtures
path: crates/wasm/tests/fixtures/
continue-on-error: true
- name: Download Python client types
uses: actions/download-artifact@v8
with:
name: python-client-types
path: clients/python/smg_client/types/
- name: Install wheel and test dependencies
run: |
pip uninstall -y smg || true
pip install wheel/*.whl
bash scripts/ci_install_e2e_deps.sh
- name: Setup Oracle
if: matrix.setup_agentic_deps
run: |
bash scripts/ci_agentic_svc_deps.sh setup-oracle-client
bash scripts/ci_agentic_svc_deps.sh create-oracle-user oracle-db
bash scripts/ci_agentic_svc_deps.sh create-oracle-flyway-user oracle-db
- name: Run E2E tests
env:
BRAVE_MCP_HOST: ${{ matrix.setup_agentic_deps && 'brave-search-mcp' || '' }}
run: |
ROUTER_LOCAL_MODEL_PATH="/models" pytest ${{ matrix.test_path }} \
-m "not external" \
--reruns 2 --reruns-delay 5 \
-s -vv
- name: Cleanup Oracle test users
if: always() && matrix.setup_agentic_deps
run: |
bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-flyway-user oracle-db
bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-user oracle-db
go-unit-tests:
name: go-unit-tests
needs: build-wheel
runs-on: k8s-runner-cpu
timeout-minutes: 15
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version: '1.24'
cache: true
cache-dependency-path: bindings/golang/go.sum
- name: Install build tools
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Download Go FFI library
uses: actions/download-artifact@v8
with:
name: go-ffi-library
path: bindings/golang/target/release/
- name: Verify Go FFI library
run: ls -la bindings/golang/target/release/libsmg_go.*
- name: Run Go unit tests
run: |
cd bindings/golang
export CGO_ENABLED=1
export CGO_LDFLAGS="-L$(pwd)/target/release"
export LD_LIBRARY_PATH="$(pwd)/target/release:$LD_LIBRARY_PATH"
go test -v ./...
go-bindings-e2e:
name: go-bindings-e2e
needs: [build-wheel, detect-changes]
if: >-
always()
&& !cancelled()
&& needs.build-wheel.result == 'success'
&& (github.event_name != 'pull_request'
|| (needs.detect-changes.result == 'success'
&& (needs.detect-changes.outputs.common == 'true'
|| needs.detect-changes.outputs.go-bindings == 'true')))
runs-on: 1-gpu-h100
timeout-minutes: 20
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup SGLang backend
uses: ./.github/actions/setup-sglang
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version: '1.24'
cache: true
cache-dependency-path: bindings/golang/go.sum
- name: Download Go FFI library
uses: actions/download-artifact@v8
with:
name: go-ffi-library
path: bindings/golang/target/release/
- name: Verify Go FFI library
run: ls -la bindings/golang/target/release/libsmg_go.*
- name: Download wheel artifact
uses: actions/download-artifact@v8
with:
name: smg-wheel
path: wheel/
- name: Download Python client types
uses: actions/download-artifact@v8
with:
name: python-client-types
path: clients/python/smg_client/types/
- name: Install wheel and test dependencies
run: |
pip uninstall -y smg || true
pip install wheel/*.whl
bash scripts/ci_install_e2e_deps.sh
- name: Run Go OAI server E2E tests
run: |
bash scripts/ci_killall_sglang.sh "nuke_gpus"
export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release"
export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH"
SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \
E2E_LOG_DIR=e2e-logs \
pytest --reruns 2 --reruns-delay 5 e2e_test/bindings_go -s -vv
- name: Worker failure diagnostics
if: failure() || cancelled()
run: bash scripts/ci_dump_worker_logs.sh e2e-logs "e2e-worker-logs-go-bindings"
- name: Upload worker logs
if: failure() || cancelled()
uses: actions/upload-artifact@v7
with:
name: e2e-worker-logs-go-bindings
path: e2e-logs/
retention-days: 7
if-no-files-found: ignore
go-bindings-benchmark:
name: go-bindings-benchmark
needs: build-wheel
if: false # Disabled
runs-on: k8s-runner-gpu
timeout-minutes: 32
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup SGLang backend
uses: ./.github/actions/setup-sglang
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version: '1.24'
cache: true
cache-dependency-path: bindings/golang/go.sum
- name: Download Go FFI library
uses: actions/download-artifact@v8
with:
name: go-ffi-library
path: bindings/golang/target/release/
- name: Verify Go FFI library
run: ls -la bindings/golang/target/release/libsmg_go.*
- name: Download wheel artifact
uses: actions/download-artifact@v8
with:
name: smg-wheel
path: wheel/
- name: Install wheel
run: |
pip uninstall -y smg || true
pip install wheel/*.whl
- name: Install test dependencies
run: |
python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy
- name: Pull genai-bench image
run: docker pull ${{ env.GENAI_BENCH_IMAGE }}
- name: Run Go bindings benchmark
run: |
bash scripts/ci_killall_sglang.sh "nuke_gpus"
export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release"
export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH"
SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \
pytest e2e_test/benchmarks/test_go_bindings_perf.py -s -vv
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v7
with:
name: genai-bench-results-go-bindings
path: benchmark_go_bindings/
finish:
needs: [pre-commit, python-lint, grpc-proto-build-check, build-wheel, python-unit-tests, unit-tests, benchmarks, e2e-1gpu-chat, e2e-1gpu-completions, e2e-1gpu-embeddings, e2e-1gpu-gateway, e2e-1gpu-responses, e2e-2gpu-pd, e2e-4gpu-chat, e2e-4gpu-gateway, e2e-vendor, go-unit-tests, go-bindings-e2e]
if: always()
runs-on: k8s-runner-cpu
permissions: {}
steps:
- name: Check CI result
run: |
if [[ "${{ needs.pre-commit.result }}" == "failure" || \
"${{ needs.python-lint.result }}" == "failure" || \
"${{ needs.grpc-proto-build-check.result }}" == "failure" || \
"${{ needs.build-wheel.result }}" == "failure" || \
"${{ needs.python-unit-tests.result }}" == "failure" || \
"${{ needs.unit-tests.result }}" == "failure" || \
"${{ needs.benchmarks.result }}" == "failure" || \
"${{ needs.e2e-1gpu-chat.result }}" == "failure" || \
"${{ needs.e2e-1gpu-completions.result }}" == "failure" || \
"${{ needs.e2e-1gpu-embeddings.result }}" == "failure" || \
"${{ needs.e2e-1gpu-gateway.result }}" == "failure" || \
"${{ needs.e2e-1gpu-responses.result }}" == "failure" || \
"${{ needs.e2e-2gpu-pd.result }}" == "failure" || \
"${{ needs.e2e-4gpu-chat.result }}" == "failure" || \
"${{ needs.e2e-4gpu-gateway.result }}" == "failure" || \
"${{ needs.e2e-vendor.result }}" == "failure" || \
"${{ needs.go-unit-tests.result }}" == "failure" || \
"${{ needs.go-bindings-e2e.result }}" == "failure" ]]; then
echo "One or more jobs failed"
exit 1
else
echo "All jobs completed successfully"
fi
summarize-benchmarks:
needs: [benchmarks]
runs-on: k8s-runner-cpu
if: success()
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Download gateway benchmark results
uses: actions/download-artifact@v8
with:
name: genai-bench-results-all-policies
- name: Create benchmark summary
run: python3 e2e_test/benchmarks/summarize.py .