fix(grpc-proto): lazy-load generated re-exports #7448

Workflow file for this run

.github/workflows/pr-test-rust.yml at d819c45

	name: PR Test (SMG)

	on:
	push:
	branches: [ main ]
	paths-ignore:
	- "docs/**"
	- "mkdocs.yml"
	- "*.md"
	pull_request:
	branches: [ main ]
	types: [opened, synchronize, reopened]
	paths-ignore:
	- "docs/**"
	- "mkdocs.yml"
	- "*.md"
	workflow_dispatch:

	permissions:
	contents: read

	# Per-PR groups cancel superseded runs; pushes to main key on the commit SHA so
	# every commit on main gets its own run and is never cancelled by a later push.
	concurrency:
	group: gateway-tests-${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) \|\| github.sha }}
	cancel-in-progress: ${{ github.event_name == 'pull_request' }}

	env:
	RUSTC_WRAPPER: sccache
	SCCACHE_GHA_ENABLED: "true"
	GENAI_BENCH_IMAGE: ghcr.io/moirai-internal/genai-bench:0.0.4

	jobs:
	pre-commit:
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Install pre-commit
	run: pip install pre-commit

	- name: Run pre-commit checks
	env:
	SKIP: rustfmt,clippy,no-commit-to-branch,branch-name-check,dco-check,no-ai-co-author
	run: pre-commit run --all-files --show-diff-on-failure

	python-lint:
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Install linting tools
	run: pip install ruff mypy

	- name: Ruff check
	run: ruff check e2e_test/ bindings/python/ scripts/

	- name: Ruff format check
	run: ruff format --check e2e_test/ bindings/python/ scripts/

	- name: Mypy (e2e_test)
	run: mypy e2e_test/ --config-file mypy.ini

	- name: Mypy (bindings/python)
	run: mypy bindings/python/ --config-file mypy.ini

	grpc-proto-build-check:
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.12"

	- name: Copy protos and build
	run: \|
	rm -f crates/grpc_client/python/smg_grpc_proto/proto
	mkdir -p crates/grpc_client/python/smg_grpc_proto/proto
	cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/
	pip install build grpcio-tools
	rm -rf crates/grpc_client/python/dist/
	cd crates/grpc_client/python && python -m build

	- name: Test import
	run: \|
	pip install crates/grpc_client/python/dist/*.whl
	python - <<'PY'
	from smg_grpc_proto import sglang_scheduler_pb2, tokenspeed_scheduler_pb2

	fields = tokenspeed_scheduler_pb2.TensorData.DESCRIPTOR.fields_by_name
	assert "data" not in fields
	assert fields["shape"].number == 1
	assert fields["dtype"].number == 2
	assert fields["inline"].number == 3
	assert fields["shm"].number == 4
	assert fields["remote"].number == 5

	print("OK")
	PY

	build-wheel:
	# CPU-only Rust/wheel/Go-FFI/WASM compile — no GPU needed. Runs on the CPU
	# runner pool (which also tolerates the nvidia.com/gpu taint, so it can use
	# the abundant stranded CPU on GPU nodes) instead of competing for scarce
	# GPU runners.
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	# No wheel-output cache here. It was keyed on a hash of every crate's
	# source, so it missed on essentially every code PR (all-or-nothing), and
	# on a miss it masked the incremental compilation cache by skipping the
	# build entirely — which also meant rust-cache rarely got populated. The
	# build now always runs and leans on rust-cache + sccache (set up in
	# ./.github/actions/setup-rust), which degrade gracefully: with the
	# dependency graph cached, only changed crates recompile.
	- name: Setup Rust
	uses: ./.github/actions/setup-rust

	- name: Build Python wheel and Go FFI library
	run: \|
	rm -rf bindings/python/dist/
	bash scripts/ci_setup_python_venv.sh
	bash scripts/ci_build_wheel.sh

	- name: Generate Python client types
	run: \|
	source "$HOME/.cargo/env"
	mkdir -p clients/openapi
	cargo run -p openapi-gen -- clients/openapi/smg-openapi.yaml
	pip install 'datamodel-code-generator==0.54.0'
	datamodel-codegen \
	--input clients/openapi/smg-openapi.yaml \
	--input-file-type openapi \
	--output clients/python/smg_client/types/_generated.py \
	--output-model-type pydantic_v2.BaseModel \
	--use-annotated \
	--field-constraints \
	--target-python-version 3.10 \
	--collapse-root-models \
	--use-standard-collections \
	--use-union-operator
	sed -i 's/class $.*$(Enum):/class \1(str, Enum):/' clients/python/smg_client/types/_generated.py

	- name: Build WASM test fixtures
	run: \|
	source "$HOME/.cargo/env"
	bash crates/wasm/tests/fixtures/build_fixtures.sh

	- name: Upload wheel artifact
	uses: actions/upload-artifact@v7
	with:
	name: smg-wheel
	path: bindings/python/dist/*.whl
	retention-days: 1

	- name: Upload Go FFI library artifact
	uses: actions/upload-artifact@v7
	with:
	name: go-ffi-library
	path: bindings/golang/target/release/libsmg_go.*
	retention-days: 1

	- name: Upload Python client types
	uses: actions/upload-artifact@v7
	with:
	name: python-client-types
	path: clients/python/smg_client/types/_generated.py
	retention-days: 1

	- name: Upload WASM test fixtures
	uses: actions/upload-artifact@v7
	with:
	name: wasm-test-fixtures
	path: crates/wasm/tests/fixtures/*.wasm
	retention-days: 1
	if-no-files-found: ignore

	- name: Show sccache stats
	if: always()
	run: sccache --show-stats

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.12"

	- name: Test wheel install
	run: \|
	pip install bindings/python/dist/*.whl
	python3 -c "import smg; print('Python package: OK')"
	python3 -c "from smg.smg_rs import Router; print('Rust extension: OK')"
	python3 -m smg.launch_router --help > /dev/null && echo "Entry point: OK"

	python-unit-tests:
	needs: build-wheel
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Download wheel artifact
	uses: actions/download-artifact@v8
	with:
	name: smg-wheel
	path: dist/

	- name: Install wheel
	run: pip install dist/*.whl

	- name: Run Python unit tests
	run: \|
	cd bindings/python
	python3 -m pip install pytest pytest-cov pytest-xdist
	pytest -q tests --cov=smg --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80

	- name: Run grpc_servicer unit tests
	run: \|
	rm -f crates/grpc_client/python/smg_grpc_proto/proto
	mkdir -p crates/grpc_client/python/smg_grpc_proto/proto
	cp crates/grpc_client/proto/*.proto crates/grpc_client/python/smg_grpc_proto/proto/
	pip install ./crates/grpc_client/python
	pytest -q grpc_servicer/tests

	unit-tests:
	needs: [detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.detect-changes.result == 'success'
	&& (
	github.event_name != 'pull_request'
	\|\| needs.detect-changes.outputs.rust-ci == 'true'
	)
	runs-on: k8s-runner-cpu
	permissions:
	contents: read
	steps:
	- uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Setup Rust
	uses: ./.github/actions/setup-rust

	- name: Verify default multimodal build does not require OpenCV
	run: \|
	source "$HOME/.cargo/env"
	cargo check -p llm-multimodal
	cargo check --manifest-path bindings/python/Cargo.toml
	cargo check --manifest-path bindings/golang/Cargo.toml

	- name: Install OpenCV build dependencies
	run: AUTO_INSTALL=1 bash scripts/install_opencv.sh

	- name: Build WASM test fixtures
	run: \|
	source "$HOME/.cargo/env"
	bash crates/wasm/tests/fixtures/build_fixtures.sh

	- name: Run lint
	run: \|
	source "$HOME/.cargo/env"
	rustup component add clippy
	cargo clippy --all-targets --all-features -- -D warnings

	- name: Run fmt
	run: \|
	source "$HOME/.cargo/env"
	rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
	rustup toolchain install nightly --profile minimal
	cargo +nightly fmt -- --check

	- name: Generate vision golden fixtures
	run: \|
	python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
	python -m pip install transformers pillow numpy scipy
	python crates/multimodal/scripts/generate_vision_golden.py

	- name: Run Rust tests
	timeout-minutes: 30
	run: \|
	source "$HOME/.cargo/env"
	cargo test

	- name: Show sccache stats
	if: always()
	run: sccache --show-stats

	# --- Benchmarks (standalone) ---

	benchmarks:
	needs: build-wheel
	runs-on: 4-gpu-h100
	timeout-minutes: 36
	permissions:
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Setup SGLang backend
	uses: ./.github/actions/setup-sglang

	- name: Download wheel artifact
	uses: actions/download-artifact@v8
	with:
	name: smg-wheel
	path: wheel/

	- name: Download WASM test fixtures
	uses: actions/download-artifact@v8
	with:
	name: wasm-test-fixtures
	path: crates/wasm/tests/fixtures/
	continue-on-error: true

	- name: Download Python client types
	uses: actions/download-artifact@v8
	with:
	name: python-client-types
	path: clients/python/smg_client/types/

	- name: Install wheel and test dependencies
	run: \|
	pip uninstall -y smg \|\| true
	pip install wheel/*.whl
	bash scripts/ci_install_e2e_deps.sh

	- name: Pull genai-bench image
	run: docker pull ${{ env.GENAI_BENCH_IMAGE }}

	- name: Run benchmarks
	env:
	ROUTER_LOCAL_MODEL_PATH: /models
	E2E_LOG_DIR: benchmark-logs
	GENAI_BENCH_TEST_TIMEOUT: "480"
	run: \|
	mkdir -p benchmark-logs
	bash scripts/ci_killall_sglang.sh "nuke_gpus"
	SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 pytest e2e_test/benchmarks \
	--ignore=e2e_test/benchmarks/test_go_bindings_perf.py \
	--ignore=e2e_test/benchmarks/test_nightly_perf.py \
	-s -vv

	- name: Upload benchmark results
	if: always()
	uses: actions/upload-artifact@v7
	with:
	name: genai-bench-results-all-policies
	path: benchmark_**/

	- name: Worker failure diagnostics
	if: failure() \|\| cancelled()
	run: bash scripts/ci_dump_worker_logs.sh benchmark-logs benchmark-worker-logs

	- name: Upload worker logs
	if: failure() \|\| cancelled()
	uses: actions/upload-artifact@v7
	with:
	name: benchmark-worker-logs
	path: benchmark-logs/
	if-no-files-found: ignore
	retention-days: 7

	detect-changes:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: read
	outputs:
	common: ${{ steps.filter.outputs.common }}
	chat-completions: ${{ steps.filter.outputs.chat-completions }}
	completions: ${{ steps.filter.outputs.completions }}
	agentic: ${{ steps.filter.outputs.agentic }}
	embeddings: ${{ steps.filter.outputs.embeddings }}
	go-bindings: ${{ steps.filter.outputs.go-bindings }}
	rust-ci: ${{ steps.filter.outputs.rust-ci }}
	steps:
	- uses: actions/checkout@v6
	- uses: dorny/paths-filter@v4
	id: filter
	with:
	filters: \|
	common:
	- 'model_gateway/**'
	- 'crates/protocols/**'
	- 'bindings/**'
	- 'e2e_test/conftest.py'
	- 'e2e_test/infra/**'
	- 'e2e_test/fixtures/**'
	- 'Cargo.lock'
	- '.github/actions/**'
	- '.github/workflows/pr-test-rust.yml'
	- '.github/workflows/e2e-gpu-job.yml'
	- 'scripts/ci_setup_python_venv.sh'
	- 'scripts/ci_install_sglang.sh'
	- 'scripts/ci_install_vllm.sh'
	- 'scripts/ci_install_tokenspeed.sh'
	- 'scripts/ci_install_e2e_deps.sh'
	- 'scripts/ci_killall_sglang.sh'
	- 'scripts/ci_build_wheel.sh'
	- 'crates/tokenizer/**'
	- 'crates/tool_parser/**'
	chat-completions:
	- 'crates/reasoning_parser/**'
	- 'crates/multimodal/**'
	- 'crates/grpc_client/**'
	- 'grpc_servicer/**'
	- 'e2e_test/chat_completions/**'
	- 'e2e_test/router/**'
	- 'scripts/ci_install_vllm.sh'
	- 'scripts/ci_install_trtllm.sh'
	- 'scripts/ci_install_tokenspeed.sh'
	agentic:
	- 'crates/mcp/**'
	- 'crates/data_connector/**'
	- 'e2e_test/responses/**'
	- 'e2e_test/messages/**'
	- 'scripts/ci_agentic_svc_deps.sh'
	- 'scripts/oracle_flyway/**'
	completions:
	- 'crates/grpc_client/**'
	- 'grpc_servicer/**'
	- 'e2e_test/completions/**'
	embeddings:
	- 'e2e_test/embeddings/**'
	go-bindings:
	- 'e2e_test/bindings_go/**'
	rust-ci:
	- 'Cargo.toml'
	- 'Cargo.lock'
	- 'rustfmt.toml'
	- 'clippy.toml'
	- 'crates/**'
	- 'test_support/**'
	- 'model_gateway/**'
	- 'bindings/python/Cargo.toml'
	- 'bindings/python/build.rs'
	- 'bindings/python/src/**'
	- 'bindings/golang/Cargo.toml'
	- 'bindings/golang/src/**'
	- 'clients/rust/**'
	- 'clients/openapi-gen/**'
	- '.cargo/**'
	- '.github/actions/setup-rust/**'
	- 'scripts/ci_install_rust.sh'
	- '.github/workflows/pr-test-rust.yml'
	- 'examples/wasm/wasm-guest-storage-hook/**'
	- 'examples/wasm/wasm-guest-storage-hook-passthrough/**'

	# --- GPU E2E: organized by GPU tier + API, engine as matrix axis ---

	# === 1 GPU ===

	e2e-1gpu-chat:
	name: e2e-1gpu-chat (${{ matrix.engine }})
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.chat-completions == 'true')))
	# Now also runs the previously-2-GPU chat tests (gpt-oss-20b,
	# Qwen2.5-14B) since they're tp=1.
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 36
	test_timeout: 28
	- engine: vllm
	timeout: 24
	test_timeout: 18
	- engine: trtllm
	timeout: 32
	test_timeout: 18
	# tokenspeed builds from source (~30m cold), so keep the job
	# timeout generous even though the test step is short.
	# Admin-ops e2e (flush/profile) piggybacks here because this is
	# the only lane with tokenspeed installed — e2e-1gpu-gateway
	# installs sglang/vllm only. The engine marker filter keeps the
	# rest of e2e_test/router out of this job.
	- engine: tokenspeed
	timeout: 50
	test_timeout: 18
	test_dirs: e2e_test/chat_completions e2e_test/router/test_admin_ops.py
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "1"
	runner: 1-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_timeout: ${{ matrix.test_timeout }}
	test_dirs: ${{ matrix.test_dirs \|\| 'e2e_test/chat_completions' }}
	secrets: inherit

	e2e-1gpu-completions:
	name: e2e-1gpu-completions (${{ matrix.engine }})
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.completions == 'true')))
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 20
	- engine: vllm
	timeout: 20
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "1"
	runner: 1-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_dirs: e2e_test/completions
	secrets: inherit

	e2e-1gpu-embeddings:
	name: e2e-1gpu-embeddings (${{ matrix.engine }})
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.embeddings == 'true')))
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 20
	- engine: vllm
	timeout: 20
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "1"
	runner: 1-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_dirs: e2e_test/embeddings
	extra_deps: "sentence-transformers"
	secrets: inherit

	e2e-1gpu-gateway:
	name: e2e-1gpu-gateway (${{ matrix.engine }})
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.chat-completions == 'true')))
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 20
	- engine: vllm
	timeout: 20
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "1"
	runner: 1-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_dirs: e2e_test/router
	test_filter: "--ignore=e2e_test/router/test_pd_mmlu.py"
	secrets: inherit

	# === 2 GPU ===

	# e2e-2gpu-chat was retired: all chat_completions tests are tp=1 now
	# and run under e2e-1gpu-chat above.

	e2e-1gpu-responses:
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.agentic == 'true')))
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: sglang
	gpu_tier: "1"
	runner: 1-gpu-h100
	timeout: 28
	test_timeout: 20
	test_dirs: e2e_test/responses
	setup_agentic_deps: true
	secrets: inherit

	e2e-2gpu-pd:
	name: e2e-2gpu-pd (${{ matrix.engine }}${{ matrix.kv_backend && format('-{0}', matrix.kv_backend) \|\| '' }})
	needs: [e2e-1gpu-gateway, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.e2e-1gpu-gateway.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.chat-completions == 'true')))
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 30
	- engine: vllm
	timeout: 30
	- engine: vllm
	kv_backend: mooncake
	timeout: 30
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "2"
	runner: 2-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_dirs: e2e_test/router
	vllm_kv_backend: ${{ matrix.kv_backend \|\| 'nixl' }}
	secrets: inherit

	# === 4 GPU ===

	e2e-4gpu-chat:
	name: e2e-4gpu-chat (${{ matrix.engine }})
	needs: [e2e-1gpu-chat]
	strategy:
	fail-fast: false
	matrix:
	include:
	- engine: sglang
	timeout: 30
	- engine: vllm
	timeout: 30
	- engine: trtllm
	timeout: 45
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: ${{ matrix.engine }}
	gpu_tier: "4"
	runner: 4-gpu-h100
	timeout: ${{ matrix.timeout }}
	test_dirs: e2e_test/chat_completions
	secrets: inherit

	e2e-4gpu-gateway:
	name: e2e-4gpu-gateway
	needs: [e2e-1gpu-gateway]
	uses: ./.github/workflows/e2e-gpu-job.yml
	with:
	engine: sglang
	gpu_tier: "4"
	runner: 4-gpu-h100
	timeout: 25
	test_dirs: e2e_test/router
	test_filter: "-k TestIGWMixedWorkerClassification"
	secrets: inherit

	# --- Vendor E2E: CPU-only cloud backend tests ---

	e2e-vendor:
	name: ${{ matrix.name }}
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.agentic == 'true')))
	permissions:
	contents: read
	strategy:
	fail-fast: false
	matrix:
	include:
	- name: anthropic-messages
	vendor: anthropic
	test_path: e2e_test/messages
	timeout: 20
	setup_agentic_deps: true

	- name: openai-responses
	vendor: openai
	test_path: e2e_test/responses
	timeout: 30
	setup_agentic_deps: true

	- name: openai-realtime
	vendor: openai
	test_path: e2e_test/realtime
	timeout: 20

	- name: xai-responses
	vendor: xai
	test_path: e2e_test/responses
	timeout: 15
	setup_agentic_deps: true

	runs-on: k8s-runner-cpu
	timeout-minutes: ${{ matrix.timeout }}
	env:
	E2E_VENDOR: ${{ matrix.vendor }}
	E2E_GPU_TIER: "0"
	SHOW_ROUTER_LOGS: "1"
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Check shared services
	if: matrix.setup_agentic_deps
	run: bash scripts/ci_agentic_svc_deps.sh check --oracle oracle-db --brave brave-search-mcp

	- name: Download wheel artifact
	uses: actions/download-artifact@v8
	with:
	name: smg-wheel
	path: wheel/

	- name: Download WASM test fixtures
	uses: actions/download-artifact@v8
	with:
	name: wasm-test-fixtures
	path: crates/wasm/tests/fixtures/
	continue-on-error: true

	- name: Download Python client types
	uses: actions/download-artifact@v8
	with:
	name: python-client-types
	path: clients/python/smg_client/types/

	- name: Install wheel and test dependencies
	run: \|
	pip uninstall -y smg \|\| true
	pip install wheel/*.whl
	bash scripts/ci_install_e2e_deps.sh

	- name: Setup Oracle
	if: matrix.setup_agentic_deps
	run: \|
	bash scripts/ci_agentic_svc_deps.sh setup-oracle-client
	bash scripts/ci_agentic_svc_deps.sh create-oracle-user oracle-db
	bash scripts/ci_agentic_svc_deps.sh create-oracle-flyway-user oracle-db

	- name: Run E2E tests
	env:
	BRAVE_MCP_HOST: ${{ matrix.setup_agentic_deps && 'brave-search-mcp' \|\| '' }}
	run: \|
	ROUTER_LOCAL_MODEL_PATH="/models" pytest ${{ matrix.test_path }} \
	-m "not external" \
	--reruns 2 --reruns-delay 5 \
	-s -vv

	- name: Cleanup Oracle test users
	if: always() && matrix.setup_agentic_deps
	run: \|
	bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-flyway-user oracle-db
	bash scripts/ci_agentic_svc_deps.sh cleanup-oracle-user oracle-db

	go-unit-tests:
	name: go-unit-tests
	needs: build-wheel
	runs-on: k8s-runner-cpu
	timeout-minutes: 15
	permissions:
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Setup Go
	uses: actions/setup-go@v6
	with:
	go-version: '1.24'
	cache: true
	cache-dependency-path: bindings/golang/go.sum

	- name: Install build tools
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential

	- name: Download Go FFI library
	uses: actions/download-artifact@v8
	with:
	name: go-ffi-library
	path: bindings/golang/target/release/

	- name: Verify Go FFI library
	run: ls -la bindings/golang/target/release/libsmg_go.*

	- name: Run Go unit tests
	run: \|
	cd bindings/golang
	export CGO_ENABLED=1
	export CGO_LDFLAGS="-L$(pwd)/target/release"
	export LD_LIBRARY_PATH="$(pwd)/target/release:$LD_LIBRARY_PATH"
	go test -v ./...

	go-bindings-e2e:
	name: go-bindings-e2e
	needs: [build-wheel, detect-changes]
	if: >-
	always()
	&& !cancelled()
	&& needs.build-wheel.result == 'success'
	&& (github.event_name != 'pull_request'
	\|\| (needs.detect-changes.result == 'success'
	&& (needs.detect-changes.outputs.common == 'true'
	\|\| needs.detect-changes.outputs.go-bindings == 'true')))
	runs-on: 1-gpu-h100
	timeout-minutes: 20
	permissions:
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Setup SGLang backend
	uses: ./.github/actions/setup-sglang

	- name: Setup Go
	uses: actions/setup-go@v6
	with:
	go-version: '1.24'
	cache: true
	cache-dependency-path: bindings/golang/go.sum

	- name: Download Go FFI library
	uses: actions/download-artifact@v8
	with:
	name: go-ffi-library
	path: bindings/golang/target/release/

	- name: Verify Go FFI library
	run: ls -la bindings/golang/target/release/libsmg_go.*

	- name: Download wheel artifact
	uses: actions/download-artifact@v8
	with:
	name: smg-wheel
	path: wheel/

	- name: Download Python client types
	uses: actions/download-artifact@v8
	with:
	name: python-client-types
	path: clients/python/smg_client/types/

	- name: Install wheel and test dependencies
	run: \|
	pip uninstall -y smg \|\| true
	pip install wheel/*.whl
	bash scripts/ci_install_e2e_deps.sh

	- name: Run Go OAI server E2E tests
	run: \|
	bash scripts/ci_killall_sglang.sh "nuke_gpus"
	export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release"
	export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH"
	SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \
	E2E_LOG_DIR=e2e-logs \
	pytest --reruns 2 --reruns-delay 5 e2e_test/bindings_go -s -vv

	- name: Worker failure diagnostics
	if: failure() \|\| cancelled()
	run: bash scripts/ci_dump_worker_logs.sh e2e-logs "e2e-worker-logs-go-bindings"

	- name: Upload worker logs
	if: failure() \|\| cancelled()
	uses: actions/upload-artifact@v7
	with:
	name: e2e-worker-logs-go-bindings
	path: e2e-logs/
	retention-days: 7
	if-no-files-found: ignore

	go-bindings-benchmark:
	name: go-bindings-benchmark
	needs: build-wheel
	if: false # Disabled
	runs-on: k8s-runner-gpu
	timeout-minutes: 32
	permissions:
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Setup SGLang backend
	uses: ./.github/actions/setup-sglang

	- name: Setup Go
	uses: actions/setup-go@v6
	with:
	go-version: '1.24'
	cache: true
	cache-dependency-path: bindings/golang/go.sum

	- name: Download Go FFI library
	uses: actions/download-artifact@v8
	with:
	name: go-ffi-library
	path: bindings/golang/target/release/

	- name: Verify Go FFI library
	run: ls -la bindings/golang/target/release/libsmg_go.*

	- name: Download wheel artifact
	uses: actions/download-artifact@v8
	with:
	name: smg-wheel
	path: wheel/

	- name: Install wheel
	run: \|
	pip uninstall -y smg \|\| true
	pip install wheel/*.whl

	- name: Install test dependencies
	run: \|
	python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy

	- name: Pull genai-bench image
	run: docker pull ${{ env.GENAI_BENCH_IMAGE }}

	- name: Run Go bindings benchmark
	run: \|
	bash scripts/ci_killall_sglang.sh "nuke_gpus"
	export CGO_LDFLAGS="-L$(pwd)/bindings/golang/target/release"
	export LD_LIBRARY_PATH="$(pwd)/bindings/golang/target/release:$LD_LIBRARY_PATH"
	SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/models" \
	pytest e2e_test/benchmarks/test_go_bindings_perf.py -s -vv

	- name: Upload benchmark results
	if: always()
	uses: actions/upload-artifact@v7
	with:
	name: genai-bench-results-go-bindings
	path: benchmark_go_bindings/

	finish:
	needs: [pre-commit, python-lint, grpc-proto-build-check, build-wheel, python-unit-tests, unit-tests, benchmarks, e2e-1gpu-chat, e2e-1gpu-completions, e2e-1gpu-embeddings, e2e-1gpu-gateway, e2e-1gpu-responses, e2e-2gpu-pd, e2e-4gpu-chat, e2e-4gpu-gateway, e2e-vendor, go-unit-tests, go-bindings-e2e]
	if: always()
	runs-on: k8s-runner-cpu
	permissions: {}
	steps:
	- name: Check CI result
	run: \|
	if [[ "${{ needs.pre-commit.result }}" == "failure" \|\| \
	"${{ needs.python-lint.result }}" == "failure" \|\| \
	"${{ needs.grpc-proto-build-check.result }}" == "failure" \|\| \
	"${{ needs.build-wheel.result }}" == "failure" \|\| \
	"${{ needs.python-unit-tests.result }}" == "failure" \|\| \
	"${{ needs.unit-tests.result }}" == "failure" \|\| \
	"${{ needs.benchmarks.result }}" == "failure" \|\| \
	"${{ needs.e2e-1gpu-chat.result }}" == "failure" \|\| \
	"${{ needs.e2e-1gpu-completions.result }}" == "failure" \|\| \
	"${{ needs.e2e-1gpu-embeddings.result }}" == "failure" \|\| \
	"${{ needs.e2e-1gpu-gateway.result }}" == "failure" \|\| \
	"${{ needs.e2e-1gpu-responses.result }}" == "failure" \|\| \
	"${{ needs.e2e-2gpu-pd.result }}" == "failure" \|\| \
	"${{ needs.e2e-4gpu-chat.result }}" == "failure" \|\| \
	"${{ needs.e2e-4gpu-gateway.result }}" == "failure" \|\| \
	"${{ needs.e2e-vendor.result }}" == "failure" \|\| \
	"${{ needs.go-unit-tests.result }}" == "failure" \|\| \
	"${{ needs.go-bindings-e2e.result }}" == "failure" ]]; then
	echo "One or more jobs failed"
	exit 1
	else
	echo "All jobs completed successfully"
	fi

	summarize-benchmarks:
	needs: [benchmarks]
	runs-on: k8s-runner-cpu
	if: success()
	permissions:
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v6
	with:
	python-version: "3.13"

	- name: Download gateway benchmark results
	uses: actions/download-artifact@v8
	with:
	name: genai-bench-results-all-policies

	- name: Create benchmark summary
	run: python3 e2e_test/benchmarks/summarize.py .

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix(grpc-proto): lazy-load generated re-exports #7448

Workflow file

fix(grpc-proto): lazy-load generated re-exports #7448

Uh oh!

Workflow file for this run