refactor(milvus): share lifecycle across stores #4441
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Integration Test [Kubernetes] | |
| on: | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| branches: | |
| - main | |
| paths-ignore: | |
| - 'website/**' | |
| push: | |
| branches: | |
| - main | |
| paths-ignore: | |
| - 'website/**' | |
| workflow_dispatch: # Allow manual triggering | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| # Detect which files changed to determine which profiles to test | |
| changes: | |
| uses: ./.github/workflows/ci-changes.yml | |
| # Determine which profiles need to be tested based on file changes | |
| determine-profiles: | |
| needs: changes | |
| runs-on: ubuntu-latest | |
| outputs: | |
| profiles: ${{ steps.set-matrix.outputs.profiles }} | |
| should_run: ${{ steps.set-matrix.outputs.should_run }} | |
| steps: | |
| - id: set-matrix | |
| run: | | |
| # Run the default PR baseline profiles if common e2e code, core code changes, or manual/scheduled trigger | |
| if [[ "${{ needs.changes.outputs.e2e_common }}" == "true" ]] || \ | |
| [[ "${{ needs.changes.outputs.core }}" == "true" ]] || \ | |
| [[ "${{ needs.changes.outputs.docker }}" == "true" ]] || \ | |
| [[ "${{ needs.changes.outputs.make }}" == "true" ]] || \ | |
| [[ "${{ needs.changes.outputs.ci }}" == "true" ]] || \ | |
| [[ "${{ needs.changes.outputs.agent_exec }}" == "true" ]] || \ | |
| [[ "${{ github.event_name }}" == "schedule" ]] || \ | |
| [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then | |
| echo 'profiles=["kubernetes", "dashboard"]' >> $GITHUB_OUTPUT | |
| echo 'should_run=true' >> $GITHUB_OUTPUT | |
| echo "Running default baseline profiles due to common/core changes or push/schedule/manual trigger" | |
| exit 0 | |
| fi | |
| # Only run affected profiles for PRs | |
| profiles=() | |
| [[ "${{ needs.changes.outputs.e2e_istio }}" == "true" ]] && profiles+=("istio") | |
| [[ "${{ needs.changes.outputs.e2e_kubernetes }}" == "true" ]] && profiles+=("kubernetes") | |
| [[ "${{ needs.changes.outputs.e2e_aibrix }}" == "true" ]] && profiles+=("aibrix") | |
| [[ "${{ needs.changes.outputs.e2e_dashboard }}" == "true" ]] && profiles+=("dashboard") | |
| [[ "${{ needs.changes.outputs.e2e_llm_d }}" == "true" ]] && profiles+=("llm-d") | |
| [[ "${{ needs.changes.outputs.e2e_routing_strategies }}" == "true" ]] && profiles+=("routing-strategies") | |
| [[ "${{ needs.changes.outputs.e2e_production_stack }}" == "true" ]] && profiles+=("production-stack") | |
| [[ "${{ needs.changes.outputs.e2e_dynamic_config }}" == "true" ]] && profiles+=("dynamic-config") | |
| [[ "${{ needs.changes.outputs.e2e_ml_model_selection }}" == "true" ]] && profiles+=("ml-model-selection") | |
| [[ "${{ needs.changes.outputs.e2e_multi_endpoint }}" == "true" ]] && profiles+=("multi-endpoint") | |
| [[ "${{ needs.changes.outputs.e2e_authz_rbac }}" == "true" ]] && profiles+=("authz-rbac") | |
| [[ "${{ needs.changes.outputs.e2e_streaming }}" == "true" ]] && profiles+=("streaming") | |
| # Convert to JSON array | |
| if [ ${#profiles[@]} -eq 0 ]; then | |
| echo 'profiles=[]' >> $GITHUB_OUTPUT | |
| echo 'should_run=false' >> $GITHUB_OUTPUT | |
| echo "No profile changes detected, skipping all e2e tests" | |
| else | |
| printf -v json '"%s",' "${profiles[@]}" | |
| echo "profiles=[${json%,}]" >> $GITHUB_OUTPUT | |
| echo 'should_run=true' >> $GITHUB_OUTPUT | |
| echo "Running profiles: ${profiles[*]}" | |
| fi | |
| integration-test: | |
| needs: [changes, determine-profiles] | |
| if: ${{ needs.determine-profiles.outputs.should_run == 'true' && !github.event.pull_request.draft }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 90 | |
| env: | |
| E2E_SEMANTIC_ROUTER_HELM_TIMEOUT: 60m | |
| strategy: | |
| fail-fast: false # Continue testing other profiles even if one fails | |
| matrix: | |
| # Dynamic profile matrix based on detected changes | |
| profile: ${{ fromJson(needs.determine-profiles.outputs.profiles) }} | |
| steps: | |
| - name: Check out the repo | |
| uses: actions/checkout@v4 | |
| - name: Free disk space and relocate Docker to /mnt | |
| run: | | |
| echo "=== Before cleanup ===" | |
| df -h / /mnt | |
| # Remove large pre-installed toolchains that E2E tests don't need | |
| sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc \ | |
| /usr/local/share/boost /usr/local/graalvm /usr/local/.ghcup \ | |
| /usr/share/swift /usr/local/lib/node_modules 2>/dev/null || true | |
| sudo docker image prune -af 2>/dev/null || true | |
| # Move Docker data root to /mnt (75 GB+ free vs ~14 GB on /) | |
| sudo systemctl stop docker | |
| sudo mv /var/lib/docker /mnt/docker | |
| sudo ln -s /mnt/docker /var/lib/docker | |
| sudo systemctl start docker | |
| # Redirect temp dir so `kind load docker-image` tarballs land on /mnt | |
| sudo mkdir -p /mnt/tmp | |
| sudo chmod 1777 /mnt/tmp | |
| echo "=== After cleanup ===" | |
| df -h / /mnt | |
| - name: Set up Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version: '1.24' | |
| - name: Set up Rust | |
| uses: actions-rust-lang/setup-rust-toolchain@v1 | |
| with: | |
| toolchain: "1.90" | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y \ | |
| make \ | |
| build-essential \ | |
| pkg-config | |
| - name: Install Kind | |
| run: | | |
| ARCH="$(uname -m)" | |
| case "$ARCH" in | |
| x86_64) KIND_ARCH="amd64" ;; | |
| aarch64) KIND_ARCH="arm64" ;; | |
| *) echo "unsupported arch: $ARCH" && exit 1 ;; | |
| esac | |
| curl --retry 5 --retry-delay 5 --retry-all-errors -Lo ./kind "https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-${KIND_ARCH}" | |
| chmod +x ./kind | |
| sudo mv ./kind /usr/local/bin/kind | |
| - name: Pre-pull Kind node image | |
| run: | | |
| KIND_NODE_IMAGE="kindest/node:v1.29.2" | |
| echo "Pre-pulling ${KIND_NODE_IMAGE} with retries..." | |
| for attempt in 1 2 3 4 5; do | |
| if docker pull "${KIND_NODE_IMAGE}"; then | |
| echo "Successfully pulled ${KIND_NODE_IMAGE}" | |
| break | |
| fi | |
| if [ "$attempt" -eq 5 ]; then | |
| echo "ERROR: Failed to pull ${KIND_NODE_IMAGE} after 5 attempts" | |
| exit 1 | |
| fi | |
| echo "Pull attempt ${attempt} failed, retrying in $((attempt * 15))s..." | |
| sleep $((attempt * 15)) | |
| done | |
| - name: Download E2E test dependencies | |
| run: | | |
| cd e2e && go mod download | |
| - name: Build E2E test binary | |
| run: | | |
| make build-e2e | |
| - name: Run Integration E2E tests (${{ matrix.profile }}) | |
| id: e2e-test | |
| env: | |
| TMPDIR: /mnt/tmp | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| bash ./e2e/testing/stream_semantic_router_logs.sh & | |
| ROUTER_LOG_STREAM_PID=$! | |
| cleanup_router_log_stream() { | |
| if kill -0 "${ROUTER_LOG_STREAM_PID}" 2>/dev/null; then | |
| echo "[router-live] stopping background router log stream (pid ${ROUTER_LOG_STREAM_PID})" | |
| kill "${ROUTER_LOG_STREAM_PID}" 2>/dev/null || true | |
| wait "${ROUTER_LOG_STREAM_PID}" 2>/dev/null || true | |
| fi | |
| } | |
| trap cleanup_router_log_stream EXIT | |
| set +e | |
| if [ "${{ matrix.profile }}" = "kubernetes" ]; then | |
| # Temporarily skip the stress / pressure coverage until the suite is stable again. | |
| KUBERNETES_CI_TESTS="chat-completions-request,apiserver-runtime-config-endpoints,domain-classify,semantic-cache,pii-detection,jailbreak-detection,decision-priority-selection,plugin-chain-execution,rule-condition-logic,decision-fallback-behavior,plugin-config-variations" | |
| make e2e-test E2E_PROFILE=${{ matrix.profile }} E2E_TESTS="${KUBERNETES_CI_TESTS}" E2E_VERBOSE=true E2E_KEEP_CLUSTER=false | |
| TEST_EXIT_CODE=$? | |
| set -e | |
| echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT | |
| exit ${TEST_EXIT_CODE} | |
| fi | |
| make e2e-test E2E_PROFILE=${{ matrix.profile }} E2E_VERBOSE=true E2E_KEEP_CLUSTER=false | |
| TEST_EXIT_CODE=$? | |
| set -e | |
| echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT | |
| exit ${TEST_EXIT_CODE} | |
| - name: Collect logs via kubectl (fallback) | |
| if: always() | |
| run: | | |
| if [ ! -f "semantic-router-logs.txt" ]; then | |
| echo "⚠️ semantic-router-logs.txt not found, collecting logs via kubectl as fallback..." | |
| echo "========================================" > semantic-router-logs.txt | |
| echo "Semantic Router Logs (collected via kubectl fallback)" >> semantic-router-logs.txt | |
| echo "========================================" >> semantic-router-logs.txt | |
| echo "" >> semantic-router-logs.txt | |
| for pod in $(kubectl get pods -n vllm-semantic-router-system -o jsonpath='{.items[*].metadata.name}' 2>/dev/null); do | |
| echo "=== Pod: $pod ===" >> semantic-router-logs.txt | |
| kubectl describe pod "$pod" -n vllm-semantic-router-system >> semantic-router-logs.txt 2>&1 || true | |
| echo "--- Logs ---" >> semantic-router-logs.txt | |
| kubectl logs "$pod" -n vllm-semantic-router-system --all-containers=true >> semantic-router-logs.txt 2>&1 || true | |
| echo "" >> semantic-router-logs.txt | |
| done | |
| echo "✅ Fallback log collection complete" | |
| else | |
| echo "✅ semantic-router-logs.txt already exists (collected by Go framework)" | |
| fi | |
| - name: Upload test reports | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: test-reports-${{ matrix.profile }} | |
| path: | | |
| test-report.json | |
| test-report.md | |
| semantic-router-logs.txt | |
| response-api-artifacts/** | |
| retention-days: 30 | |
| - name: Create test summary from report | |
| if: always() | |
| run: | | |
| if [ -f "test-report.md" ]; then | |
| echo "=== Reading test report from test-report.md ===" | |
| cat test-report.md >> $GITHUB_STEP_SUMMARY | |
| # Add semantic-router logs section if available | |
| if [ -f "semantic-router-logs.txt" ]; then | |
| { | |
| printf '\n---\n\n### 📝 Semantic Router Logs\n\n<details>\n<summary>Click to view semantic-router logs</summary>\n\n```\n' | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| # Add first 500 lines of logs to summary (to avoid exceeding GitHub limits) | |
| head -n 500 semantic-router-logs.txt >> $GITHUB_STEP_SUMMARY | |
| # Check if there are more lines | |
| TOTAL_LINES=$(wc -l < semantic-router-logs.txt) | |
| if [ "$TOTAL_LINES" -gt 500 ]; then | |
| { | |
| printf '\n... (showing first 500 lines of %s total lines)\n\n' "$TOTAL_LINES" | |
| printf '📦 Full logs are available in the workflow artifacts: semantic-router-logs.txt\n' | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| { | |
| printf '```\n\n</details>\n' | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| # Add additional context | |
| { | |
| printf '\n---\n\n### 📚 Additional Resources\n\n' | |
| printf -- '- **Profile:** `%s`\n' "${{ matrix.profile }}" | |
| printf -- '- **Trigger:** %s\n' "${{ github.event_name }}" | |
| printf -- '- **Branch:** `%s`\n' "${{ github.ref_name }}" | |
| printf -- '- **Commit:** `%s`\n' "${{ github.sha }}" | |
| printf -- '- **Workflow Run:** [%s](%s/%s/actions/runs/%s)\n' "${{ github.run_id }}" "${{ github.server_url }}" "${{ github.repository }}" "${{ github.run_id }}" | |
| printf -- '- [E2E Test Framework Documentation](https://github.com/%s/tree/main/e2e)\n' "${{ github.repository }}" | |
| printf -- '- [%s Profile](https://github.com/%s/tree/main/e2e/profiles/%s)\n' "${{ matrix.profile }}" "${{ github.repository }}" "${{ matrix.profile }}" | |
| printf '\n### 📦 Artifacts\n\n' | |
| printf -- '- **test-report.json** - Detailed test results in JSON format\n' | |
| printf -- '- **test-report.md** - Human-readable test report\n' | |
| printf -- '- **semantic-router-logs.txt** - Complete semantic-router pod logs\n' | |
| printf -- '- All artifacts are retained for 30 days as `test-reports-%s`\n' "${{ matrix.profile }}" | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "⚠️ Test report file not found!" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "The E2E test framework did not generate a report file." >> $GITHUB_STEP_SUMMARY | |
| echo "This might indicate that the test failed before report generation." >> $GITHUB_STEP_SUMMARY | |
| fi | |
| - name: Clean up | |
| if: always() | |
| run: | | |
| make e2e-cleanup || true |