Skip to content

Ci test

Ci test #30

name: Complete Testing Pipeline

Check failure on line 1 in .github/workflows/complete-testing.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/complete-testing.yml

Invalid workflow file

(Line: 95, Col: 19): Unrecognized named-value: 'runner'. Located at position 1 within expression: runner.temp
on:
push:
branches: [ main, release-* ]
pull_request:
branches: [ main ]
schedule:
- cron: '0 2 * * *' # Nightly at 2 AM UTC
workflow_dispatch:
inputs:
run_chaos_tests:
description: 'Run chaos tests'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'
env:
GO_VERSION: '1.23'
KIND_VERSION: 'v0.26.0'
CHAOS_MESH_VERSION: 'latest'
jobs:
unit-tests:
name: Unit Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install ZMQ
run: |
sudo apt-get update
sudo apt-get install -y libzmq3-dev pkg-config
- name: Run unit tests
run: |
make test-zmq-coverage
make test-kv-sync
- name: Upload coverage
uses: actions/upload-artifact@v4
with:
name: unit-test-coverage
path: coverage.out
integration-tests:
name: Integration Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install ZMQ
run: |
sudo apt-get update
sudo apt-get install -y libzmq3-dev pkg-config
- name: Run integration tests
run: |
# Run only the kv_event_sync tests (exclude webhook tests that need kubebuilder)
go test -v -tags="zmq" github.com/vllm-project/aibrix/test/integration -run "^Test(PodLifecycle|Configuration|EventFlow|Concurrent|MetricsUpdate|ErrorHandling)"
- name: Generate test report
if: always()
run: |
go install github.com/jstemmer/go-junit-report/v2@latest
go test -v -tags="zmq" github.com/vllm-project/aibrix/test/integration -run "^Test(PodLifecycle|Configuration|EventFlow|Concurrent|MetricsUpdate|ErrorHandling)" 2>&1 | go-junit-report -set-exit-code > integration-test-report.xml
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: integration-test-report
path: integration-test-report.xml
e2e-tests:
name: E2E Tests
runs-on: ubuntu-latest
timeout-minutes: 60
env:
GO_VERSION: 1.21
KIND_VERSION: v0.21.0
KUBECONFIG: ${{ runner.temp }}/kubeconfig
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install Kind
run: |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/${{ env.KIND_VERSION }}/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
- name: Create Kind cluster
run: |
kind create cluster --config development/vllm/kind-config.yaml --wait 120s
kubectl cluster-info
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Install ZMQ
run: |
sudo apt-get update
sudo apt-get install -y libzmq3-dev pkg-config
- name: Cleanup Docker (avoid "no space left on device")
run: docker system prune -af --volumes
- name: Build and load images
run: |
make docker-build-all
for image in controller-manager gateway-plugins metadata-service runtime kvcache-watcher; do
echo "Loading $image image..."
kind load docker-image aibrix/$image:nightly --nodes kind-worker
done
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Deploy AIBrix
run: |
kubectl apply -k config/dependency --server-side
kubectl apply -k config/test
echo "Waiting for Redis..."
kubectl wait --for=condition=ready pod -l app=redis,role=master -n aibrix-system --timeout=300s
echo "Waiting for controller-manager..."
kubectl wait --for=condition=ready pod -l control-plane=controller-manager -n aibrix-system --timeout=300s
echo "Pod status:"
kubectl get pods -n aibrix-system
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Build mock vLLM image
working-directory: development/app
run: |
docker build -t aibrix/vllm-mock:nightly -f Dockerfile .
kind load docker-image aibrix/vllm-mock:nightly --nodes kind-worker
- name: Deploy mock workload
working-directory: development/app
run: |
kubectl apply -k config/mock
kubectl wait --for=condition=ready pod -l app=mock-llama2-7b --timeout=300s
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Setup port-forwarding
run: |
kubectl port-forward svc/llama2-7b 8000:8000 >/tmp/llama-pf.log 2>&1 &
kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 >/tmp/envoy-pf.log 2>&1 &
kubectl -n aibrix-system port-forward service/aibrix-redis-master 6379:6379 >/tmp/redis-pf.log 2>&1 &
sleep 10
echo "Testing Redis connectivity..."
nc -zv 127.0.0.1 6379 || { echo "Cannot connect to Redis"; cat /tmp/redis-pf.log || true; exit 1; }
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Run E2E tests
run: |
go test -v -tags="zmq" ./test/e2e -run "TestKVSync" -timeout 30m
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Collect logs on failure
if: failure()
run: |
kubectl get pods --all-namespaces
kubectl logs -n aibrix-system -l app.kubernetes.io/name=controller-manager --tail=100
kubectl logs -n kv-sync-test --all-containers=true --tail=100 || true
env:
KUBECONFIG: ${{ env.KUBECONFIG }}
- name: Cleanup
if: always()
run: kind delete cluster
performance-tests:
name: Performance Benchmarks
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install ZMQ
run: |
sudo apt-get update
sudo apt-get install -y libzmq3-dev pkg-config
- name: Download baseline metrics
uses: actions/download-artifact@v4
with:
name: baseline-metrics
path: test/benchmark/
continue-on-error: true
- name: Run performance benchmarks
run: |
cd test/benchmark
go test -bench=. -benchmem -benchtime=10s -count=3 -cpu=1,2,4 -tags="zmq" | tee benchmark-results.txt
- name: Generate benchmark report
run: |
go install golang.org/x/perf/cmd/benchstat@latest
cd test/benchmark
benchstat baseline_metrics.txt benchmark-results.txt > benchmark-comparison.txt || true
- name: Check for performance regression
run: |
cd test/benchmark
go run -tags="zmq" <<'EOF'
package main
import (
"fmt"
"os"
)
func main() {
// Simple regression check - enhance as needed
fmt.Println("Performance regression check passed")
}
EOF
- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ github.sha }}
path: |
test/benchmark/benchmark-results.txt
test/benchmark/benchmark-comparison.txt
- name: Update baseline metrics
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: baseline-metrics
path: test/benchmark/benchmark-results.txt
chaos-tests:
name: Chaos Tests
runs-on: ubuntu-latest
if: |
github.event_name == 'schedule' ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.run_chaos_tests == 'true')
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Install Kind
run: |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/${{ env.KIND_VERSION }}/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
- name: Create Kind cluster
run: |
kind create cluster --config development/vllm/kind-config.yaml --name chaos-test
kubectl cluster-info
- name: Install Chaos Mesh
run: |
curl -sSL https://mirrors.chaos-mesh.org/${{ env.CHAOS_MESH_VERSION }}/install.sh | bash
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=controller-manager -n chaos-mesh --timeout=300s
- name: Install ZMQ
run: |
sudo apt-get update
sudo apt-get install -y libzmq3-dev pkg-config
- name: Build and deploy test environment
run: |
make docker-build-all
kind load docker-image aibrix/controller-manager:nightly --name chaos-test
kind load docker-image aibrix/gateway-plugins:nightly --name chaos-test
kind load docker-image aibrix/vllm-mock:nightly --name chaos-test
kubectl apply -k config/dependency --server-side
kubectl apply -k config/test
- name: Run chaos tests
run: |
export KUBECONFIG="${HOME}/.kube/config"
go test -v ./test/chaos/ -timeout 45m
- name: Collect chaos test results
if: always()
run: |
kubectl get chaosengine --all-namespaces
kubectl logs -n chaos-mesh -l app.kubernetes.io/component=controller-manager --tail=100
- name: Cleanup
if: always()
run: |
kind delete cluster --name chaos-test
test-report:
name: Generate Test Report
runs-on: ubuntu-latest
needs: [unit-tests, integration-tests, e2e-tests]
if: always()
steps:
- uses: actions/checkout@v4
- name: Download test artifacts
uses: actions/download-artifact@v4
with:
path: test-artifacts
- name: Generate consolidated report
run: |
echo "# Test Results Summary" > test-report.md
echo "" >> test-report.md
echo "## Unit Tests" >> test-report.md
if [ -f test-artifacts/unit-test-coverage/coverage.out ]; then
echo "✅ Unit tests completed" >> test-report.md
go tool cover -func=test-artifacts/unit-test-coverage/coverage.out | tail -1 >> test-report.md
else
echo "❌ Unit tests failed or did not produce coverage" >> test-report.md
fi
echo "" >> test-report.md
echo "## Integration Tests" >> test-report.md
if [ -f test-artifacts/integration-test-report/integration-test-report.xml ]; then
echo "✅ Integration tests completed" >> test-report.md
else
echo "❌ Integration tests failed" >> test-report.md
fi
echo "" >> test-report.md
echo "## E2E Tests" >> test-report.md
echo "Status based on job result" >> test-report.md
echo "" >> test-report.md
cat test-report.md
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: test-report
path: test-report.md
- name: Comment PR with test results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const report = fs.readFileSync('test-report.md', 'utf8');
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: report
});
notify-results:
name: Notify Results
runs-on: ubuntu-latest
needs: [unit-tests, integration-tests, e2e-tests, performance-tests, chaos-tests]
if: always() && github.event_name == 'schedule'
steps:
- name: Determine status
id: status
run: |
if [ "${{ needs.unit-tests.result }}" != "success" ] || \
[ "${{ needs.integration-tests.result }}" != "success" ] || \
[ "${{ needs.e2e-tests.result }}" != "success" ]; then
echo "status=failure" >> $GITHUB_OUTPUT
else
echo "status=success" >> $GITHUB_OUTPUT
fi
- name: Notify on failure
if: steps.status.outputs.status == 'failure'
run: |
echo "Tests failed! Notification would be sent here."
# Add Slack/email notification logic here