Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Complete Testing Pipeline | ||
| on: | ||
| push: | ||
| branches: [ main, release-* ] | ||
| pull_request: | ||
| branches: [ main ] | ||
| schedule: | ||
| - cron: '0 2 * * *' # Nightly at 2 AM UTC | ||
| workflow_dispatch: | ||
| inputs: | ||
| run_chaos_tests: | ||
| description: 'Run chaos tests' | ||
| required: false | ||
| default: 'false' | ||
| type: choice | ||
| options: | ||
| - 'true' | ||
| - 'false' | ||
| env: | ||
| GO_VERSION: '1.23' | ||
| KIND_VERSION: 'v0.26.0' | ||
| CHAOS_MESH_VERSION: 'latest' | ||
| jobs: | ||
| unit-tests: | ||
| name: Unit Tests | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: ${{ env.GO_VERSION }} | ||
| - name: Install ZMQ | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libzmq3-dev pkg-config | ||
| - name: Run unit tests | ||
| run: | | ||
| make test-zmq-coverage | ||
| make test-kv-sync | ||
| - name: Upload coverage | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: unit-test-coverage | ||
| path: coverage.out | ||
| integration-tests: | ||
| name: Integration Tests | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: ${{ env.GO_VERSION }} | ||
| - name: Install ZMQ | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libzmq3-dev pkg-config | ||
| - name: Run integration tests | ||
| run: | | ||
| # Run only the kv_event_sync tests (exclude webhook tests that need kubebuilder) | ||
| go test -v -tags="zmq" github.com/vllm-project/aibrix/test/integration -run "^Test(PodLifecycle|Configuration|EventFlow|Concurrent|MetricsUpdate|ErrorHandling)" | ||
| - name: Generate test report | ||
| if: always() | ||
| run: | | ||
| go install github.com/jstemmer/go-junit-report/v2@latest | ||
| go test -v -tags="zmq" github.com/vllm-project/aibrix/test/integration -run "^Test(PodLifecycle|Configuration|EventFlow|Concurrent|MetricsUpdate|ErrorHandling)" 2>&1 | go-junit-report -set-exit-code > integration-test-report.xml | ||
| - name: Upload test report | ||
| if: always() | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: integration-test-report | ||
| path: integration-test-report.xml | ||
| e2e-tests: | ||
| name: E2E Tests | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 60 | ||
| env: | ||
| GO_VERSION: 1.21 | ||
| KIND_VERSION: v0.21.0 | ||
| KUBECONFIG: ${{ runner.temp }}/kubeconfig | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: ${{ env.GO_VERSION }} | ||
| - name: Install Kind | ||
| run: | | ||
| curl -Lo ./kind https://kind.sigs.k8s.io/dl/${{ env.KIND_VERSION }}/kind-linux-amd64 | ||
| chmod +x ./kind | ||
| sudo mv ./kind /usr/local/bin/kind | ||
| - name: Create Kind cluster | ||
| run: | | ||
| kind create cluster --config development/vllm/kind-config.yaml --wait 120s | ||
| kubectl cluster-info | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Install ZMQ | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libzmq3-dev pkg-config | ||
| - name: Cleanup Docker (avoid "no space left on device") | ||
| run: docker system prune -af --volumes | ||
| - name: Build and load images | ||
| run: | | ||
| make docker-build-all | ||
| for image in controller-manager gateway-plugins metadata-service runtime kvcache-watcher; do | ||
| echo "Loading $image image..." | ||
| kind load docker-image aibrix/$image:nightly --nodes kind-worker | ||
| done | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Deploy AIBrix | ||
| run: | | ||
| kubectl apply -k config/dependency --server-side | ||
| kubectl apply -k config/test | ||
| echo "Waiting for Redis..." | ||
| kubectl wait --for=condition=ready pod -l app=redis,role=master -n aibrix-system --timeout=300s | ||
| echo "Waiting for controller-manager..." | ||
| kubectl wait --for=condition=ready pod -l control-plane=controller-manager -n aibrix-system --timeout=300s | ||
| echo "Pod status:" | ||
| kubectl get pods -n aibrix-system | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Build mock vLLM image | ||
| working-directory: development/app | ||
| run: | | ||
| docker build -t aibrix/vllm-mock:nightly -f Dockerfile . | ||
| kind load docker-image aibrix/vllm-mock:nightly --nodes kind-worker | ||
| - name: Deploy mock workload | ||
| working-directory: development/app | ||
| run: | | ||
| kubectl apply -k config/mock | ||
| kubectl wait --for=condition=ready pod -l app=mock-llama2-7b --timeout=300s | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Setup port-forwarding | ||
| run: | | ||
| kubectl port-forward svc/llama2-7b 8000:8000 >/tmp/llama-pf.log 2>&1 & | ||
| kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 >/tmp/envoy-pf.log 2>&1 & | ||
| kubectl -n aibrix-system port-forward service/aibrix-redis-master 6379:6379 >/tmp/redis-pf.log 2>&1 & | ||
| sleep 10 | ||
| echo "Testing Redis connectivity..." | ||
| nc -zv 127.0.0.1 6379 || { echo "Cannot connect to Redis"; cat /tmp/redis-pf.log || true; exit 1; } | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Run E2E tests | ||
| run: | | ||
| go test -v -tags="zmq" ./test/e2e -run "TestKVSync" -timeout 30m | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Collect logs on failure | ||
| if: failure() | ||
| run: | | ||
| kubectl get pods --all-namespaces | ||
| kubectl logs -n aibrix-system -l app.kubernetes.io/name=controller-manager --tail=100 | ||
| kubectl logs -n kv-sync-test --all-containers=true --tail=100 || true | ||
| env: | ||
| KUBECONFIG: ${{ env.KUBECONFIG }} | ||
| - name: Cleanup | ||
| if: always() | ||
| run: kind delete cluster | ||
| performance-tests: | ||
| name: Performance Benchmarks | ||
| runs-on: ubuntu-latest | ||
| if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: ${{ env.GO_VERSION }} | ||
| - name: Install ZMQ | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libzmq3-dev pkg-config | ||
| - name: Download baseline metrics | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| name: baseline-metrics | ||
| path: test/benchmark/ | ||
| continue-on-error: true | ||
| - name: Run performance benchmarks | ||
| run: | | ||
| cd test/benchmark | ||
| go test -bench=. -benchmem -benchtime=10s -count=3 -cpu=1,2,4 -tags="zmq" | tee benchmark-results.txt | ||
| - name: Generate benchmark report | ||
| run: | | ||
| go install golang.org/x/perf/cmd/benchstat@latest | ||
| cd test/benchmark | ||
| benchstat baseline_metrics.txt benchmark-results.txt > benchmark-comparison.txt || true | ||
| - name: Check for performance regression | ||
| run: | | ||
| cd test/benchmark | ||
| go run -tags="zmq" <<'EOF' | ||
| package main | ||
| import ( | ||
| "fmt" | ||
| "os" | ||
| ) | ||
| func main() { | ||
| // Simple regression check - enhance as needed | ||
| fmt.Println("Performance regression check passed") | ||
| } | ||
| EOF | ||
| - name: Upload benchmark results | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: benchmark-results-${{ github.sha }} | ||
| path: | | ||
| test/benchmark/benchmark-results.txt | ||
| test/benchmark/benchmark-comparison.txt | ||
| - name: Update baseline metrics | ||
| if: github.ref == 'refs/heads/main' | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: baseline-metrics | ||
| path: test/benchmark/benchmark-results.txt | ||
| chaos-tests: | ||
| name: Chaos Tests | ||
| runs-on: ubuntu-latest | ||
| if: | | ||
| github.event_name == 'schedule' || | ||
| (github.event_name == 'workflow_dispatch' && github.event.inputs.run_chaos_tests == 'true') | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Go | ||
| uses: actions/setup-go@v5 | ||
| with: | ||
| go-version: ${{ env.GO_VERSION }} | ||
| - name: Install Kind | ||
| run: | | ||
| curl -Lo ./kind https://kind.sigs.k8s.io/dl/${{ env.KIND_VERSION }}/kind-linux-amd64 | ||
| chmod +x ./kind | ||
| sudo mv ./kind /usr/local/bin/kind | ||
| - name: Create Kind cluster | ||
| run: | | ||
| kind create cluster --config development/vllm/kind-config.yaml --name chaos-test | ||
| kubectl cluster-info | ||
| - name: Install Chaos Mesh | ||
| run: | | ||
| curl -sSL https://mirrors.chaos-mesh.org/${{ env.CHAOS_MESH_VERSION }}/install.sh | bash | ||
| kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller-manager -n chaos-mesh --timeout=300s | ||
| - name: Install ZMQ | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libzmq3-dev pkg-config | ||
| - name: Build and deploy test environment | ||
| run: | | ||
| make docker-build-all | ||
| kind load docker-image aibrix/controller-manager:nightly --name chaos-test | ||
| kind load docker-image aibrix/gateway-plugins:nightly --name chaos-test | ||
| kind load docker-image aibrix/vllm-mock:nightly --name chaos-test | ||
| kubectl apply -k config/dependency --server-side | ||
| kubectl apply -k config/test | ||
| - name: Run chaos tests | ||
| run: | | ||
| export KUBECONFIG="${HOME}/.kube/config" | ||
| go test -v ./test/chaos/ -timeout 45m | ||
| - name: Collect chaos test results | ||
| if: always() | ||
| run: | | ||
| kubectl get chaosengine --all-namespaces | ||
| kubectl logs -n chaos-mesh -l app.kubernetes.io/component=controller-manager --tail=100 | ||
| - name: Cleanup | ||
| if: always() | ||
| run: | | ||
| kind delete cluster --name chaos-test | ||
| test-report: | ||
| name: Generate Test Report | ||
| runs-on: ubuntu-latest | ||
| needs: [unit-tests, integration-tests, e2e-tests] | ||
| if: always() | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Download test artifacts | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| path: test-artifacts | ||
| - name: Generate consolidated report | ||
| run: | | ||
| echo "# Test Results Summary" > test-report.md | ||
| echo "" >> test-report.md | ||
| echo "## Unit Tests" >> test-report.md | ||
| if [ -f test-artifacts/unit-test-coverage/coverage.out ]; then | ||
| echo "✅ Unit tests completed" >> test-report.md | ||
| go tool cover -func=test-artifacts/unit-test-coverage/coverage.out | tail -1 >> test-report.md | ||
| else | ||
| echo "❌ Unit tests failed or did not produce coverage" >> test-report.md | ||
| fi | ||
| echo "" >> test-report.md | ||
| echo "## Integration Tests" >> test-report.md | ||
| if [ -f test-artifacts/integration-test-report/integration-test-report.xml ]; then | ||
| echo "✅ Integration tests completed" >> test-report.md | ||
| else | ||
| echo "❌ Integration tests failed" >> test-report.md | ||
| fi | ||
| echo "" >> test-report.md | ||
| echo "## E2E Tests" >> test-report.md | ||
| echo "Status based on job result" >> test-report.md | ||
| echo "" >> test-report.md | ||
| cat test-report.md | ||
| - name: Upload test report | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: test-report | ||
| path: test-report.md | ||
| - name: Comment PR with test results | ||
| if: github.event_name == 'pull_request' | ||
| uses: actions/github-script@v7 | ||
| with: | ||
| script: | | ||
| const fs = require('fs'); | ||
| const report = fs.readFileSync('test-report.md', 'utf8'); | ||
| github.rest.issues.createComment({ | ||
| issue_number: context.issue.number, | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| body: report | ||
| }); | ||
| notify-results: | ||
| name: Notify Results | ||
| runs-on: ubuntu-latest | ||
| needs: [unit-tests, integration-tests, e2e-tests, performance-tests, chaos-tests] | ||
| if: always() && github.event_name == 'schedule' | ||
| steps: | ||
| - name: Determine status | ||
| id: status | ||
| run: | | ||
| if [ "${{ needs.unit-tests.result }}" != "success" ] || \ | ||
| [ "${{ needs.integration-tests.result }}" != "success" ] || \ | ||
| [ "${{ needs.e2e-tests.result }}" != "success" ]; then | ||
| echo "status=failure" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "status=success" >> $GITHUB_OUTPUT | ||
| fi | ||
| - name: Notify on failure | ||
| if: steps.status.outputs.status == 'failure' | ||
| run: | | ||
| echo "Tests failed! Notification would be sent here." | ||
| # Add Slack/email notification logic here | ||