Skip to content

feat: support session aware routing #6761

feat: support session aware routing

feat: support session aware routing #6761

name: Test And Build
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches:
- main
push:
branches:
- main
schedule:
# Run nightly at 2:00 AM UTC
- cron: "0 2 * * *"
workflow_dispatch: # Allow manual triggering
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
HELM_VERSION: v3.14.0
jobs:
# use changes as filter
changes:
uses: ./.github/workflows/ci-changes.yml
test-and-build:
needs: changes
if: >-
${{ !github.event.pull_request.draft
&& (github.event_name == 'schedule'
|| needs.changes.outputs.core == 'true'
|| needs.changes.outputs.helm == 'true'
|| needs.changes.outputs.e2e == 'true'
|| needs.changes.outputs.docker == 'true'
|| needs.changes.outputs.make == 'true'
|| needs.changes.outputs.ci == 'true') }}
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v4
- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.90
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "1.24"
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
make \
build-essential \
pkg-config
- name: Set up Helm
uses: azure/setup-helm@v4
with:
version: ${{ env.HELM_VERSION }}
- name: Validate Helm chart
run: make helm-ci-validate HELM_NAMESPACE=test-namespace
- name: Set up golangci-lint
uses: golangci/golangci-lint-action@v7
with:
version: v2.5.0
install-mode: binary
args: --help
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
candle-binding/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Cache Go dependencies
uses: actions/cache@v4
with:
path: |
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Setup model storage on /mnt
run: |
# Use /mnt for model storage (has ~75GB vs ~14GB on root)
# This helps prevent "no space left on device" errors
echo "Disk space before setup:"
df -h / && df -h /mnt
# Create /mnt/models directory if it doesn't exist
sudo mkdir -p /mnt/models
sudo chown -R $USER:$USER /mnt/models
# If models directory already exists in workspace, move it to /mnt
if [ -d "models" ] && [ ! -L "models" ]; then
echo "Moving existing models directory to /mnt/models..."
# Move contents if /mnt/models is not empty, otherwise just move the directory
if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then
echo "Warning: /mnt/models already has content, merging..."
sudo cp -r models/* /mnt/models/ || true
rm -rf models
else
sudo mv models /mnt/models
fi
fi
# Create symlink from models/ to /mnt/models/ so existing code continues to work
if [ ! -e "models" ]; then
ln -s /mnt/models models
echo "Created symlink: models -> /mnt/models"
elif [ -L "models" ]; then
echo "Symlink already exists: models -> $(readlink models)"
else
echo "Warning: models exists but is not a symlink"
fi
echo "Disk space after setup:"
df -h / && df -h /mnt
echo "Models directory setup complete. Models will be stored in /mnt/models"
- name: Cache Models
uses: actions/cache@v4
with:
path: |
models/
key: ${{ runner.os }}-models-v2-${{ hashFiles('tools/make/models.mk') }}
restore-keys: |
${{ runner.os }}-models-v2-
continue-on-error: true # Don't fail the job if caching fails
- name: Check go mod tidy
run: make check-go-mod-tidy
- name: Build Rust library (CPU-only, no CUDA)
run: make rust-ci
- name: Install HuggingFace CLI
run: |
pip install -U "huggingface_hub[cli]" hf_transfer
- name: Start Milvus service
run: |
echo "Starting Milvus vector database..."
# Pre-pull with retries to handle Docker Hub rate limits
MILVUS_IMAGE="milvusdb/milvus:v2.3.3"
for attempt in 1 2 3 4 5; do
if docker pull "${MILVUS_IMAGE}"; then
echo "Successfully pulled ${MILVUS_IMAGE}"
break
fi
if [ "$attempt" -eq 5 ]; then
echo "ERROR: Failed to pull ${MILVUS_IMAGE} after 5 attempts"
exit 1
fi
echo "Pull attempt ${attempt} failed, retrying in $((attempt * 15))s..."
sleep $((attempt * 15))
done
docker run -d \
--name milvus-semantic-cache \
--security-opt seccomp:unconfined \
-e ETCD_USE_EMBED=true \
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
-e COMMON_STORAGETYPE=local \
-e CLUSTER_ENABLED=false \
-p 19530:19530 \
-p 9091:9091 \
"${MILVUS_IMAGE}" \
milvus run standalone
echo "Waiting for Milvus to be ready..."
sleep 20
# Verify Milvus is responsive
timeout 30 bash -c 'until docker logs milvus-semantic-cache 2>&1 | grep -q "Proxy successfully started"; do sleep 2; done' || true
echo "Milvus is ready at localhost:19530"
docker ps --filter "name=milvus-semantic-cache"
- name: Start Redis service
run: |
echo "Starting Redis Stack..."
make start-redis
- name: Start Valkey service
run: |
echo "Starting Valkey bundle..."
make start-valkey
- name: Run semantic router tests
run: make test
env:
CI: true
CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
CGO_ENABLED: 1
LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release
MILVUS_URI: localhost:19530
SKIP_MILVUS_TESTS: false
SKIP_REDIS_TESTS: false
SKIP_VALKEY_TESTS: false
VALKEY_HOST: localhost
VALKEY_PORT: 6380
# HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
# For PRs from forks, this will be empty and gated models will gracefully skip
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}
- name: Clean Redis service
if: always()
run: |
echo "Stopping Redis container and cleaning data..."
make clean-redis
- name: Clean Valkey service
if: always()
run: |
echo "Stopping Valkey container and cleaning data..."
make clean-valkey
- name: Stop Milvus service
if: always()
run: |
echo "Stopping Milvus container..."
docker stop milvus-semantic-cache || true
docker rm milvus-semantic-cache || true
echo "Milvus container cleaned up"
- name: Upload test artifacts on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: test-logs
path: |
**/*.log
**/test-output.*
dist/helm/*.yaml
retention-days: 7
- name: Notify on failure
if: failure()
run: |
echo "::error::Test and build failed. Check the workflow run for details."
echo "To reproduce this job locally: make test-and-build-local"
echo "To run the baseline PR parity gate locally: make agent-pr-gate"
# Trigger Docker publishing on successful nightly runs
publish-docker:
needs: test-and-build
if: github.repository == 'vllm-project/semantic-router' && success() && github.event_name == 'schedule'
uses: ./.github/workflows/docker-publish.yml
with:
tag_suffix: nightly-$(date +'%Y%m%d')
is_nightly: true
secrets: inherit