airunway/.github/workflows/e2e-controller.yml at 6dde3cd140a45c98653984c81c38ba6c3595c39b · kaito-project/airunway · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
name: E2E Controller Tests

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:

permissions:
  contents: read

jobs:
  e2e-controller:
    runs-on: ubuntu-latest-16-cores
    timeout-minutes: 30

    steps:
      - name: Checkout repository
        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0

      - name: Setup Go
        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version: "1.25"
          cache-dependency-path: controller/go.sum

      - name: Setup Bun
        uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
        with:
          bun-version: latest

      - name: Setup Kind
        run: |
          go install sigs.k8s.io/kind@latest
          kind create cluster --name airunway-e2e --wait 120s

      - name: Install KAITO operator
        run: |
          helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito
          # If Gateway API Inference Extension CRDs are pre-installed (e.g. via kubectl apply),
          # add --skip-crds to avoid a field-manager conflict on InferencePool.
          helm install kaito-workspace kaito/workspace \
            --namespace kaito-workspace \
            --create-namespace \
            --set featureGates.disableNodeAutoProvisioning=true
          kubectl wait --for=condition=Available deployment -n kaito-workspace -l app.kubernetes.io/name=workspace --timeout=120s

      - name: Build and deploy controller
        run: |
          make controller-docker-build CONTROLLER_IMG=airunway-controller:e2e
          kind load docker-image airunway-controller:e2e --name airunway-e2e
          make controller-deploy CONTROLLER_IMG=airunway-controller:e2e
          kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=controller-manager --timeout=120s

      - name: Build and deploy KAITO provider
        run: |
          make -C providers/kaito docker-build IMG=kaito-provider:e2e
          kind load docker-image kaito-provider:e2e --name airunway-e2e
          make -C providers/kaito deploy IMG=kaito-provider:e2e
          kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=kaito-provider --timeout=120s

      - name: Wait for provider registration
        run: |
          kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/kaito --timeout=120s

      - name: Create CPU-only ModelDeployment
        run: |
          kubectl apply -f controller/test/e2e/testdata/cpu-modeldeployment.yaml

      - name: Wait for ModelDeployment to reach Running phase
        run: |
          kubectl wait --for=condition=WorkspaceSucceeded workspace/llama-cpu-e2e -n default --timeout=600s 2>/dev/null || true

          echo "Waiting for ModelDeployment to reach Running phase..."
          for i in $(seq 1 60); do
            PHASE=$(kubectl get modeldeployment llama-cpu-e2e -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
            echo "Attempt $i/60: phase=$PHASE"
            if [ "$PHASE" = "Running" ]; then
              echo "✅ ModelDeployment is Running"
              exit 0
            fi
            sleep 10
          done
          echo "❌ Timed out waiting for ModelDeployment to reach Running phase"
          exit 1

      - name: Test inference endpoint
        run: |
          # Get the actual service port
          SVC_PORT=$(kubectl get svc llama-cpu-e2e -n default -o jsonpath='{.spec.ports[0].port}')
          echo "Service port: $SVC_PORT"

          kubectl port-forward svc/llama-cpu-e2e 8080:${SVC_PORT} -n default &
          sleep 5

          RESPONSE=$(curl -sf http://localhost:8080/v1/chat/completions \
            -H "Content-Type: application/json" \
            -d '{
              "model": "llama-3.2-1b-instruct",
              "messages": [{"role": "user", "content": "Say hello in one word."}],
              "max_tokens": 10
            }')

          echo "Response: $RESPONSE"

          echo "$RESPONSE" | jq -e '.choices' > /dev/null
          echo "$RESPONSE" | jq -e '.choices[0].message.content' > /dev/null

          echo "✅ Inference endpoint responded with valid chat completion"

      - name: Collect debug info
        if: failure()
        run: |
          echo "=== ModelDeployments ==="
          kubectl get modeldeployments -A -o yaml
          echo "=== InferenceProviderConfigs ==="
          kubectl get inferenceproviderconfigs -o yaml
          echo "=== Workspaces ==="
          kubectl get workspaces -A -o yaml
          echo "=== Controller Logs ==="
          kubectl logs -n airunway-system -l control-plane=controller-manager --tail=100
          echo "=== KAITO Provider Logs ==="
          kubectl logs -n airunway-system -l control-plane=kaito-provider --tail=100
          echo "=== Events ==="
          kubectl get events -A --sort-by=.lastTimestamp
          echo "=== Pods ==="
          kubectl get pods -A

      - name: Cleanup
        if: always()
        run: |
          kind delete cluster --name airunway-e2e