feat: support session aware routing #6761

Workflow file for this run

.github/workflows/test-and-build.yml at b8b91c8

	name: Test And Build

	on:
	pull_request:
	types: [opened, synchronize, reopened, ready_for_review]
	branches:
	- main
	push:
	branches:
	- main
	schedule:
	# Run nightly at 2:00 AM UTC
	- cron: "0 2 * * *"
	workflow_dispatch: # Allow manual triggering

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	env:
	HELM_VERSION: v3.14.0

	jobs:
	# use changes as filter
	changes:
	uses: ./.github/workflows/ci-changes.yml

	test-and-build:
	needs: changes
	if: >-
	${{ !github.event.pull_request.draft
	&& (github.event_name == 'schedule'
	\|\| needs.changes.outputs.core == 'true'
	\|\| needs.changes.outputs.helm == 'true'
	\|\| needs.changes.outputs.e2e == 'true'
	\|\| needs.changes.outputs.docker == 'true'
	\|\| needs.changes.outputs.make == 'true'
	\|\| needs.changes.outputs.ci == 'true') }}
	runs-on: ubuntu-latest
	steps:
	- name: Check out the repo
	uses: actions/checkout@v4

	- name: Set up Rust
	uses: dtolnay/rust-toolchain@stable
	with:
	toolchain: 1.90

	- name: Set up Go
	uses: actions/setup-go@v5
	with:
	go-version: "1.24"

	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y \
	make \
	build-essential \
	pkg-config

	- name: Set up Helm
	uses: azure/setup-helm@v4
	with:
	version: ${{ env.HELM_VERSION }}

	- name: Validate Helm chart
	run: make helm-ci-validate HELM_NAMESPACE=test-namespace

	- name: Set up golangci-lint
	uses: golangci/golangci-lint-action@v7
	with:
	version: v2.5.0
	install-mode: binary
	args: --help

	- name: Cache Rust dependencies
	uses: actions/cache@v4
	with:
	path: \|
	~/.cargo/bin/
	~/.cargo/registry/index/
	~/.cargo/registry/cache/
	~/.cargo/git/db/
	candle-binding/target/
	key: ${{ runner.os }}-cargo-${{ hashFiles('/Cargo.lock', '/Cargo.toml') }}
	restore-keys: \|
	${{ runner.os }}-cargo-

	- name: Cache Go dependencies
	uses: actions/cache@v4
	with:
	path: \|
	~/go/pkg/mod
	key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
	restore-keys: \|
	${{ runner.os }}-go-

	- name: Setup model storage on /mnt
	run: \|
	# Use /mnt for model storage (has ~75GB vs ~14GB on root)
	# This helps prevent "no space left on device" errors
	echo "Disk space before setup:"
	df -h / && df -h /mnt

	# Create /mnt/models directory if it doesn't exist
	sudo mkdir -p /mnt/models
	sudo chown -R $USER:$USER /mnt/models

	# If models directory already exists in workspace, move it to /mnt
	if [ -d "models" ] && [ ! -L "models" ]; then
	echo "Moving existing models directory to /mnt/models..."
	# Move contents if /mnt/models is not empty, otherwise just move the directory
	if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then
	echo "Warning: /mnt/models already has content, merging..."
	sudo cp -r models/* /mnt/models/ \|\| true
	rm -rf models
	else
	sudo mv models /mnt/models
	fi
	fi

	# Create symlink from models/ to /mnt/models/ so existing code continues to work
	if [ ! -e "models" ]; then
	ln -s /mnt/models models
	echo "Created symlink: models -> /mnt/models"
	elif [ -L "models" ]; then
	echo "Symlink already exists: models -> $(readlink models)"
	else
	echo "Warning: models exists but is not a symlink"
	fi

	echo "Disk space after setup:"
	df -h / && df -h /mnt
	echo "Models directory setup complete. Models will be stored in /mnt/models"

	- name: Cache Models
	uses: actions/cache@v4
	with:
	path: \|
	models/
	key: ${{ runner.os }}-models-v2-${{ hashFiles('tools/make/models.mk') }}
	restore-keys: \|
	${{ runner.os }}-models-v2-
	continue-on-error: true # Don't fail the job if caching fails

	- name: Check go mod tidy
	run: make check-go-mod-tidy

	- name: Build Rust library (CPU-only, no CUDA)
	run: make rust-ci

	- name: Install HuggingFace CLI
	run: \|
	pip install -U "huggingface_hub[cli]" hf_transfer


	- name: Start Milvus service
	run: \|
	echo "Starting Milvus vector database..."

	# Pre-pull with retries to handle Docker Hub rate limits
	MILVUS_IMAGE="milvusdb/milvus:v2.3.3"
	for attempt in 1 2 3 4 5; do
	if docker pull "${MILVUS_IMAGE}"; then
	echo "Successfully pulled ${MILVUS_IMAGE}"
	break
	fi
	if [ "$attempt" -eq 5 ]; then
	echo "ERROR: Failed to pull ${MILVUS_IMAGE} after 5 attempts"
	exit 1
	fi
	echo "Pull attempt ${attempt} failed, retrying in $((attempt * 15))s..."
	sleep $((attempt * 15))
	done

	docker run -d \
	--name milvus-semantic-cache \
	--security-opt seccomp:unconfined \
	-e ETCD_USE_EMBED=true \
	-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
	-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
	-e COMMON_STORAGETYPE=local \
	-e CLUSTER_ENABLED=false \
	-p 19530:19530 \
	-p 9091:9091 \
	"${MILVUS_IMAGE}" \
	milvus run standalone

	echo "Waiting for Milvus to be ready..."
	sleep 20

	# Verify Milvus is responsive
	timeout 30 bash -c 'until docker logs milvus-semantic-cache 2>&1 \| grep -q "Proxy successfully started"; do sleep 2; done' \|\| true

	echo "Milvus is ready at localhost:19530"
	docker ps --filter "name=milvus-semantic-cache"

	- name: Start Redis service
	run: \|
	echo "Starting Redis Stack..."
	make start-redis

	- name: Start Valkey service
	run: \|
	echo "Starting Valkey bundle..."
	make start-valkey

	- name: Run semantic router tests
	run: make test
	env:
	CI: true
	CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
	CGO_ENABLED: 1
	LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release
	MILVUS_URI: localhost:19530
	SKIP_MILVUS_TESTS: false
	SKIP_REDIS_TESTS: false
	SKIP_VALKEY_TESTS: false
	VALKEY_HOST: localhost
	VALKEY_PORT: 6380
	# HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
	# For PRs from forks, this will be empty and gated models will gracefully skip
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}

	- name: Clean Redis service
	if: always()
	run: \|
	echo "Stopping Redis container and cleaning data..."
	make clean-redis

	- name: Clean Valkey service
	if: always()
	run: \|
	echo "Stopping Valkey container and cleaning data..."
	make clean-valkey

	- name: Stop Milvus service
	if: always()
	run: \|
	echo "Stopping Milvus container..."
	docker stop milvus-semantic-cache \|\| true
	docker rm milvus-semantic-cache \|\| true
	echo "Milvus container cleaned up"

	- name: Upload test artifacts on failure
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-logs
	path: \|
	*/.log
	*/test-output.
	dist/helm/*.yaml
	retention-days: 7

	- name: Notify on failure
	if: failure()
	run: \|
	echo "::error::Test and build failed. Check the workflow run for details."
	echo "To reproduce this job locally: make test-and-build-local"
	echo "To run the baseline PR parity gate locally: make agent-pr-gate"

	# Trigger Docker publishing on successful nightly runs
	publish-docker:
	needs: test-and-build
	if: github.repository == 'vllm-project/semantic-router' && success() && github.event_name == 'schedule'
	uses: ./.github/workflows/docker-publish.yml
	with:
	tag_suffix: nightly-$(date +'%Y%m%d')
	is_nightly: true
	secrets: inherit

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: support session aware routing #6761

Workflow file

feat: support session aware routing #6761

Uh oh!

Workflow file for this run