|
| 1 | +name: E2E Gateway Tests |
| 2 | + |
| 3 | +on: |
| 4 | + push: |
| 5 | + branches: [main] |
| 6 | + pull_request: |
| 7 | + branches: [main] |
| 8 | + workflow_dispatch: |
| 9 | + |
| 10 | +jobs: |
| 11 | + e2e-gateway: |
| 12 | + runs-on: ubuntu-latest-16-cores |
| 13 | + timeout-minutes: 45 |
| 14 | + |
| 15 | + steps: |
| 16 | + - name: Checkout repository |
| 17 | + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v4 |
| 18 | + |
| 19 | + - name: Setup Go |
| 20 | + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5 |
| 21 | + with: |
| 22 | + go-version: "1.25" |
| 23 | + cache-dependency-path: controller/go.sum |
| 24 | + |
| 25 | + - name: Setup Kind |
| 26 | + run: | |
| 27 | + go install sigs.k8s.io/kind@latest |
| 28 | + kind create cluster --name kubeairunway-gw-e2e --wait 120s |
| 29 | + # Allow workloads on control plane node for LoadBalancer access |
| 30 | + kubectl label node kubeairunway-gw-e2e-control-plane node.kubernetes.io/exclude-from-external-load-balancers- 2>/dev/null || true |
| 31 | +
|
| 32 | + - name: Install cloud-provider-kind |
| 33 | + run: | |
| 34 | + go install sigs.k8s.io/cloud-provider-kind@latest |
| 35 | + cloud-provider-kind & |
| 36 | + sleep 5 |
| 37 | + echo "✅ cloud-provider-kind running" |
| 38 | +
|
| 39 | + - name: Install Gateway API CRDs |
| 40 | + run: | |
| 41 | + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/latest/download/standard-install.yaml |
| 42 | +
|
| 43 | + - name: Install Gateway API Inference Extension CRDs |
| 44 | + run: | |
| 45 | + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.3.1/manifests.yaml |
| 46 | +
|
| 47 | + - name: Install Istio with Inference Extension support |
| 48 | + run: | |
| 49 | + curl -L https://istio.io/downloadIstio | sh - |
| 50 | + cd istio-*/bin |
| 51 | + ./istioctl install --set profile=minimal \ |
| 52 | + --set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true -y |
| 53 | + kubectl wait --for=condition=Available deployment/istiod -n istio-system --timeout=120s |
| 54 | + echo "✅ Istio installed" |
| 55 | +
|
| 56 | + - name: Install KAITO operator |
| 57 | + run: | |
| 58 | + helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito |
| 59 | + helm install kaito-workspace kaito/workspace \ |
| 60 | + --namespace kaito-workspace \ |
| 61 | + --create-namespace \ |
| 62 | + --set featureGates.disableNodeAutoProvisioning=true |
| 63 | + kubectl wait --for=condition=Available deployment -n kaito-workspace -l app.kubernetes.io/name=workspace --timeout=120s |
| 64 | +
|
| 65 | + - name: Build and deploy controller |
| 66 | + run: | |
| 67 | + make controller-docker-build CONTROLLER_IMG=kubeairunway-controller:e2e |
| 68 | + kind load docker-image kubeairunway-controller:e2e --name kubeairunway-gw-e2e |
| 69 | + make controller-deploy CONTROLLER_IMG=kubeairunway-controller:e2e |
| 70 | + kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=controller-manager --timeout=120s |
| 71 | +
|
| 72 | + - name: Build and deploy KAITO provider |
| 73 | + run: | |
| 74 | + make kaito-provider-docker-build KAITO_PROVIDER_IMG=kaito-provider:e2e |
| 75 | + kind load docker-image kaito-provider:e2e --name kubeairunway-gw-e2e |
| 76 | + make kaito-provider-deploy KAITO_PROVIDER_IMG=kaito-provider:e2e |
| 77 | + kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=kaito-provider --timeout=120s |
| 78 | +
|
| 79 | + - name: Wait for provider registration |
| 80 | + run: | |
| 81 | + kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/kaito --timeout=120s |
| 82 | +
|
| 83 | + - name: Create Gateway resource |
| 84 | + run: | |
| 85 | + kubectl apply -f controller/test/e2e/testdata/gateway.yaml |
| 86 | + echo "Waiting for Gateway to be programmed..." |
| 87 | + for i in $(seq 1 30); do |
| 88 | + PROGRAMMED=$(kubectl get gateway inference-gateway -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null || echo "") |
| 89 | + if [ "$PROGRAMMED" = "True" ]; then |
| 90 | + echo "✅ Gateway is programmed" |
| 91 | + break |
| 92 | + fi |
| 93 | + echo "Attempt $i/30: programmed=$PROGRAMMED" |
| 94 | + if [ "$i" = "30" ]; then |
| 95 | + echo "⚠️ Gateway not programmed after 30 attempts, continuing anyway (Kind may not support LoadBalancer)" |
| 96 | + fi |
| 97 | + sleep 5 |
| 98 | + done |
| 99 | +
|
| 100 | + - name: Create ModelDeployment with gateway enabled |
| 101 | + run: | |
| 102 | + kubectl apply -f controller/test/e2e/testdata/gateway-modeldeployment.yaml |
| 103 | +
|
| 104 | + - name: Wait for ModelDeployment to reach Running phase |
| 105 | + run: | |
| 106 | + kubectl wait --for=condition=WorkspaceSucceeded workspace/llama-gw-e2e -n default --timeout=600s 2>/dev/null || true |
| 107 | +
|
| 108 | + echo "Waiting for ModelDeployment to reach Running phase..." |
| 109 | + for i in $(seq 1 60); do |
| 110 | + PHASE=$(kubectl get modeldeployment llama-gw-e2e -o jsonpath='{.status.phase}' 2>/dev/null || echo "") |
| 111 | + echo "Attempt $i/60: phase=$PHASE" |
| 112 | + if [ "$PHASE" = "Running" ]; then |
| 113 | + echo "✅ ModelDeployment is Running" |
| 114 | + exit 0 |
| 115 | + fi |
| 116 | + sleep 10 |
| 117 | + done |
| 118 | + echo "❌ Timed out waiting for ModelDeployment to reach Running phase" |
| 119 | + exit 1 |
| 120 | +
|
| 121 | + - name: Verify InferencePool created |
| 122 | + run: | |
| 123 | + echo "Waiting for InferencePool..." |
| 124 | + for i in $(seq 1 30); do |
| 125 | + if kubectl get inferencepool llama-gw-e2e -n default > /dev/null 2>&1; then |
| 126 | + echo "✅ InferencePool found" |
| 127 | + break |
| 128 | + fi |
| 129 | + echo "Attempt $i/30: InferencePool not found yet" |
| 130 | + if [ "$i" = "30" ]; then |
| 131 | + echo "❌ Timed out waiting for InferencePool" |
| 132 | + exit 1 |
| 133 | + fi |
| 134 | + sleep 5 |
| 135 | + done |
| 136 | +
|
| 137 | + # Verify selector label |
| 138 | + SELECTOR=$(kubectl get inferencepool llama-gw-e2e -n default \ |
| 139 | + -o jsonpath='{.spec.selector.matchLabels.kubeairunway\.ai/model-deployment}') |
| 140 | + if [ "$SELECTOR" != "llama-gw-e2e" ]; then |
| 141 | + echo "❌ InferencePool selector mismatch: expected 'llama-gw-e2e', got '$SELECTOR'" |
| 142 | + exit 1 |
| 143 | + fi |
| 144 | + echo "✅ InferencePool selector correct" |
| 145 | +
|
| 146 | + # Verify endpointPickerRef |
| 147 | + EPP_NAME=$(kubectl get inferencepool llama-gw-e2e -n default \ |
| 148 | + -o jsonpath='{.spec.endpointPickerRef.name}') |
| 149 | + if [ -z "$EPP_NAME" ]; then |
| 150 | + echo "❌ InferencePool missing endpointPickerRef" |
| 151 | + exit 1 |
| 152 | + fi |
| 153 | + echo "✅ InferencePool endpointPickerRef set: $EPP_NAME" |
| 154 | +
|
| 155 | + - name: Verify HTTPRoute created |
| 156 | + run: | |
| 157 | + echo "Waiting for HTTPRoute..." |
| 158 | + for i in $(seq 1 30); do |
| 159 | + if kubectl get httproute llama-gw-e2e -n default > /dev/null 2>&1; then |
| 160 | + echo "✅ HTTPRoute found" |
| 161 | + break |
| 162 | + fi |
| 163 | + echo "Attempt $i/30: HTTPRoute not found yet" |
| 164 | + if [ "$i" = "30" ]; then |
| 165 | + echo "❌ Timed out waiting for HTTPRoute" |
| 166 | + exit 1 |
| 167 | + fi |
| 168 | + sleep 5 |
| 169 | + done |
| 170 | +
|
| 171 | + # Verify parent ref points to gateway |
| 172 | + PARENT=$(kubectl get httproute llama-gw-e2e -n default \ |
| 173 | + -o jsonpath='{.spec.parentRefs[0].name}') |
| 174 | + if [ "$PARENT" != "inference-gateway" ]; then |
| 175 | + echo "❌ HTTPRoute parent mismatch: expected 'inference-gateway', got '$PARENT'" |
| 176 | + exit 1 |
| 177 | + fi |
| 178 | + echo "✅ HTTPRoute parent ref correct" |
| 179 | +
|
| 180 | + # Verify backend ref points to InferencePool |
| 181 | + BACKEND_GROUP=$(kubectl get httproute llama-gw-e2e -n default \ |
| 182 | + -o jsonpath='{.spec.rules[0].backendRefs[0].group}') |
| 183 | + BACKEND_KIND=$(kubectl get httproute llama-gw-e2e -n default \ |
| 184 | + -o jsonpath='{.spec.rules[0].backendRefs[0].kind}') |
| 185 | + if [ "$BACKEND_GROUP" != "inference.networking.k8s.io" ] || [ "$BACKEND_KIND" != "InferencePool" ]; then |
| 186 | + echo "❌ HTTPRoute backend ref mismatch: group=$BACKEND_GROUP kind=$BACKEND_KIND" |
| 187 | + exit 1 |
| 188 | + fi |
| 189 | + echo "✅ HTTPRoute backend ref correct" |
| 190 | +
|
| 191 | + - name: Verify gateway status and model name auto-discovery |
| 192 | + run: | |
| 193 | + echo "Waiting for GatewayReady condition..." |
| 194 | + for i in $(seq 1 30); do |
| 195 | + GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \ |
| 196 | + -o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}' 2>/dev/null || echo "") |
| 197 | + if [ "$GW_READY" = "True" ]; then |
| 198 | + echo "✅ GatewayReady condition is True" |
| 199 | + break |
| 200 | + fi |
| 201 | + echo "Attempt $i/30: GatewayReady=$GW_READY" |
| 202 | + if [ "$i" = "30" ]; then |
| 203 | + echo "❌ Timed out waiting for GatewayReady condition" |
| 204 | + exit 1 |
| 205 | + fi |
| 206 | + sleep 5 |
| 207 | + done |
| 208 | +
|
| 209 | + # Check auto-discovered model name |
| 210 | + MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \ |
| 211 | + -o jsonpath='{.status.gateway.modelName}') |
| 212 | + if [ -z "$MODEL_NAME" ]; then |
| 213 | + echo "❌ Gateway model name is empty" |
| 214 | + exit 1 |
| 215 | + fi |
| 216 | + echo "✅ Gateway model name auto-discovered: $MODEL_NAME" |
| 217 | +
|
| 218 | + - name: Wait for EPP to be ready |
| 219 | + run: | |
| 220 | + echo "Waiting for EPP deployment..." |
| 221 | + for i in $(seq 1 30); do |
| 222 | + READY=$(kubectl get deployment llama-gw-e2e-epp -n default -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") |
| 223 | + if [ "$READY" = "1" ]; then |
| 224 | + echo "✅ EPP is ready" |
| 225 | + break |
| 226 | + fi |
| 227 | + echo "Attempt $i/30: EPP readyReplicas=$READY" |
| 228 | + if [ "$i" = "30" ]; then |
| 229 | + echo "❌ EPP not ready" |
| 230 | + exit 1 |
| 231 | + fi |
| 232 | + sleep 10 |
| 233 | + done |
| 234 | +
|
| 235 | + - name: Configure Istio DestinationRule for EPP |
| 236 | + run: | |
| 237 | + kubectl apply -f - <<'DREOF' |
| 238 | + apiVersion: networking.istio.io/v1beta1 |
| 239 | + kind: DestinationRule |
| 240 | + metadata: |
| 241 | + name: llama-gw-e2e-epp |
| 242 | + namespace: default |
| 243 | + spec: |
| 244 | + host: llama-gw-e2e-epp.default.svc.cluster.local |
| 245 | + trafficPolicy: |
| 246 | + tls: |
| 247 | + mode: SIMPLE |
| 248 | + insecureSkipVerify: true |
| 249 | + DREOF |
| 250 | + echo "✅ Istio DestinationRule created for EPP" |
| 251 | +
|
| 252 | + - name: Install Body-Based Router (BBR) |
| 253 | + run: | |
| 254 | + helm install body-based-router \ |
| 255 | + --set provider.name=istio \ |
| 256 | + --version v1.3.1 \ |
| 257 | + oci://registry.k8s.io/gateway-api-inference-extension/charts/body-based-routing \ |
| 258 | + --wait --timeout 120s |
| 259 | + echo "✅ BBR installed" |
| 260 | +
|
| 261 | + - name: Test inference through gateway |
| 262 | + run: | |
| 263 | + MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \ |
| 264 | + -o jsonpath='{.status.gateway.modelName}') |
| 265 | + echo "Model name: $MODEL_NAME" |
| 266 | +
|
| 267 | + # Get the Gateway LoadBalancer IP (provided by cloud-provider-kind) |
| 268 | + GW_IP="" |
| 269 | + for i in $(seq 1 30); do |
| 270 | + GW_IP=$(kubectl get gateway inference-gateway -o jsonpath='{.status.addresses[0].value}' 2>/dev/null || echo "") |
| 271 | + if [ -n "$GW_IP" ]; then |
| 272 | + echo "Gateway IP: $GW_IP" |
| 273 | + break |
| 274 | + fi |
| 275 | + echo "Waiting for Gateway IP... attempt $i/30" |
| 276 | + sleep 5 |
| 277 | + done |
| 278 | +
|
| 279 | + if [ -z "$GW_IP" ]; then |
| 280 | + echo "❌ Gateway IP not assigned" |
| 281 | + exit 1 |
| 282 | + fi |
| 283 | +
|
| 284 | + echo "Sending inference request through gateway at http://${GW_IP}..." |
| 285 | + for i in $(seq 1 18); do |
| 286 | + HTTP_CODE=$(curl -s -o /tmp/response.json -w '%{http_code}' --max-time 30 \ |
| 287 | + http://${GW_IP}/v1/chat/completions \ |
| 288 | + -H "Content-Type: application/json" \ |
| 289 | + -d "{ |
| 290 | + \"model\": \"$MODEL_NAME\", |
| 291 | + \"messages\": [{\"role\": \"user\", \"content\": \"Say hello in one word.\"}], |
| 292 | + \"max_tokens\": 10 |
| 293 | + }" 2>&1 || true) |
| 294 | + RESPONSE=$(cat /tmp/response.json 2>/dev/null || echo "") |
| 295 | +
|
| 296 | + if [ "$HTTP_CODE" = "200" ] && echo "$RESPONSE" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then |
| 297 | + echo "Response: $RESPONSE" |
| 298 | + echo "✅ Inference through gateway succeeded" |
| 299 | + exit 0 |
| 300 | + fi |
| 301 | + echo "Attempt $i/18: HTTP=$HTTP_CODE body=$(echo $RESPONSE | head -c 200)" |
| 302 | + sleep 10 |
| 303 | + done |
| 304 | + echo "❌ Inference through gateway failed" |
| 305 | + exit 1 |
| 306 | +
|
| 307 | + - name: Test gateway disable and cleanup |
| 308 | + run: | |
| 309 | + # Disable gateway |
| 310 | + kubectl patch modeldeployment llama-gw-e2e -n default \ |
| 311 | + --type=merge -p '{"spec":{"gateway":{"enabled":false}}}' |
| 312 | +
|
| 313 | + echo "Waiting for gateway resources to be cleaned up..." |
| 314 | + sleep 15 |
| 315 | +
|
| 316 | + # Verify InferencePool deleted |
| 317 | + if kubectl get inferencepool llama-gw-e2e -n default 2>/dev/null; then |
| 318 | + echo "❌ InferencePool should have been deleted" |
| 319 | + exit 1 |
| 320 | + fi |
| 321 | + echo "✅ InferencePool cleaned up" |
| 322 | +
|
| 323 | + # Verify HTTPRoute deleted |
| 324 | + if kubectl get httproute llama-gw-e2e -n default 2>/dev/null; then |
| 325 | + echo "❌ HTTPRoute should have been deleted" |
| 326 | + exit 1 |
| 327 | + fi |
| 328 | + echo "✅ HTTPRoute cleaned up" |
| 329 | +
|
| 330 | + # Verify GatewayReady condition is False |
| 331 | + GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \ |
| 332 | + -o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}') |
| 333 | + if [ "$GW_READY" != "False" ]; then |
| 334 | + echo "❌ GatewayReady condition should be False after disable: $GW_READY" |
| 335 | + exit 1 |
| 336 | + fi |
| 337 | + echo "✅ GatewayReady condition is False after disable" |
| 338 | +
|
| 339 | + - name: Collect debug info |
| 340 | + if: failure() |
| 341 | + run: | |
| 342 | + echo "=== ModelDeployments ===" |
| 343 | + kubectl get modeldeployments -A -o yaml |
| 344 | + echo "=== InferencePools ===" |
| 345 | + kubectl get inferencepools -A -o yaml 2>/dev/null || echo "No InferencePools" |
| 346 | + echo "=== HTTPRoutes ===" |
| 347 | + kubectl get httproutes -A -o yaml 2>/dev/null || echo "No HTTPRoutes" |
| 348 | + echo "=== Gateways ===" |
| 349 | + kubectl get gateways -A -o yaml 2>/dev/null || echo "No Gateways" |
| 350 | + echo "=== Workspaces ===" |
| 351 | + kubectl get workspaces -A -o yaml |
| 352 | + echo "=== Controller Logs ===" |
| 353 | + kubectl logs -n kubeairunway-system -l control-plane=controller-manager --tail=200 |
| 354 | + echo "=== KAITO Provider Logs ===" |
| 355 | + kubectl logs -n kubeairunway-system -l control-plane=kaito-provider --tail=100 |
| 356 | + echo "=== EPP Logs ===" |
| 357 | + kubectl logs -n default -l app.kubernetes.io/name=llama-gw-e2e-epp --tail=100 2>/dev/null || echo "No EPP logs" |
| 358 | + echo "=== Istio Logs ===" |
| 359 | + kubectl logs -n istio-system -l app=istiod --tail=100 2>/dev/null || echo "No Istio logs" |
| 360 | + echo "=== Gateway Proxy Logs ===" |
| 361 | + GW_POD=$(kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") |
| 362 | + [ -n "$GW_POD" ] && kubectl logs "$GW_POD" -n default --tail=50 2>/dev/null || echo "No gateway proxy logs" |
| 363 | + echo "=== Gateway Pods ===" |
| 364 | + kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o yaml |
| 365 | + echo "=== Events ===" |
| 366 | + kubectl get events -A --sort-by=.lastTimestamp |
| 367 | + echo "=== Pods ===" |
| 368 | + kubectl get pods -A |
| 369 | +
|
| 370 | + - name: Cleanup |
| 371 | + if: always() |
| 372 | + run: | |
| 373 | + kind delete cluster --name kubeairunway-gw-e2e |
0 commit comments