Skip to content

test(e2e): add GPU end-to-end suite for dynamo, vllm, kaito #974

test(e2e): add GPU end-to-end suite for dynamo, vllm, kaito

test(e2e): add GPU end-to-end suite for dynamo, vllm, kaito #974

Workflow file for this run

name: E2E Gateway Tests
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
permissions:
contents: read
jobs:
e2e-gateway:
runs-on: ubuntu-latest-16-cores
timeout-minutes: 45
steps:
- name: Checkout repository
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- name: Load component versions
run: |
set -a
source versions.env
set +a
echo "GAIE_VERSION=${GAIE_VERSION}" >> "$GITHUB_ENV"
- name: Setup Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version: "1.25"
cache-dependency-path: controller/go.sum
- name: Setup Bun
uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
with:
bun-version: latest
- name: Setup Kind
run: |
go install sigs.k8s.io/kind@latest
kind create cluster --name airunway-gw-e2e --wait 120s
# Allow workloads on control plane node for LoadBalancer access
kubectl label node airunway-gw-e2e-control-plane node.kubernetes.io/exclude-from-external-load-balancers- 2>/dev/null || true
- name: Install cloud-provider-kind
run: |
# Pinned: cloud-provider-kind v0.11.0+ requires Go 1.26. This job runs
# Go 1.25, and actions/setup-go exports GOTOOLCHAIN=local (which forbids
# auto-downloading a newer toolchain), so `@latest` fails to install.
# v0.10.0 is the newest release that builds on Go 1.25.
go install sigs.k8s.io/cloud-provider-kind@v0.10.0
cloud-provider-kind &
sleep 5
echo "✅ cloud-provider-kind running"
- name: Install Gateway API CRDs
run: |
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/latest/download/standard-install.yaml
- name: Install Gateway API Inference Extension CRDs
run: |
kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/${GAIE_VERSION}/manifests.yaml"
- name: Install Istio with Inference Extension support
run: |
curl -L https://istio.io/downloadIstio | sh -
cd istio-*/bin
./istioctl install --set profile=minimal \
--set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true -y
kubectl wait --for=condition=Available deployment/istiod -n istio-system --timeout=120s
echo "✅ Istio installed"
- name: Install KAITO operator
run: |
helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito
# If Gateway API Inference Extension CRDs are pre-installed (e.g. via kubectl apply),
# add --skip-crds to avoid a field-manager conflict on InferencePool.
helm install kaito-workspace kaito/workspace \
--namespace kaito-workspace \
--create-namespace \
--set featureGates.disableNodeAutoProvisioning=true
kubectl wait --for=condition=Available deployment -n kaito-workspace -l app.kubernetes.io/name=workspace --timeout=120s
- name: Build and deploy controller
run: |
make controller-docker-build CONTROLLER_IMG=airunway-controller:e2e
kind load docker-image airunway-controller:e2e --name airunway-gw-e2e
make controller-deploy CONTROLLER_IMG=airunway-controller:e2e
kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=controller-manager --timeout=120s
- name: Build and deploy KAITO provider
run: |
make -C providers/kaito docker-build IMG=kaito-provider:e2e
kind load docker-image kaito-provider:e2e --name airunway-gw-e2e
make -C providers/kaito deploy IMG=kaito-provider:e2e
kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=kaito-provider --timeout=120s
- name: Wait for provider registration
run: |
kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/kaito --timeout=120s
- name: Create Gateway resource
run: |
kubectl apply -f controller/test/e2e/testdata/gateway.yaml
echo "Waiting for Gateway to be programmed..."
for i in $(seq 1 30); do
PROGRAMMED=$(kubectl get gateway inference-gateway -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null || echo "")
if [ "$PROGRAMMED" = "True" ]; then
echo "✅ Gateway is programmed"
break
fi
echo "Attempt $i/30: programmed=$PROGRAMMED"
if [ "$i" = "30" ]; then
echo "⚠️ Gateway not programmed after 30 attempts, continuing anyway (Kind may not support LoadBalancer)"
fi
sleep 5
done
- name: Create ModelDeployment with gateway enabled
run: |
kubectl apply -f controller/test/e2e/testdata/gateway-modeldeployment.yaml
- name: Wait for ModelDeployment to reach Running phase
run: |
kubectl wait --for=condition=WorkspaceSucceeded workspace/llama-gw-e2e -n default --timeout=600s 2>/dev/null || true
echo "Waiting for ModelDeployment to reach Running phase..."
for i in $(seq 1 60); do
PHASE=$(kubectl get modeldeployment llama-gw-e2e -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
echo "Attempt $i/60: phase=$PHASE"
if [ "$PHASE" = "Running" ]; then
echo "✅ ModelDeployment is Running"
exit 0
fi
sleep 10
done
echo "❌ Timed out waiting for ModelDeployment to reach Running phase"
exit 1
- name: Verify InferencePool created
run: |
echo "Waiting for InferencePool..."
for i in $(seq 1 30); do
if kubectl get inferencepool llama-gw-e2e -n default > /dev/null 2>&1; then
echo "✅ InferencePool found"
break
fi
echo "Attempt $i/30: InferencePool not found yet"
if [ "$i" = "30" ]; then
echo "❌ Timed out waiting for InferencePool"
exit 1
fi
sleep 5
done
# Verify selector label
SELECTOR=$(kubectl get inferencepool llama-gw-e2e -n default \
-o jsonpath='{.spec.selector.matchLabels.airunway\.ai/model-deployment}')
if [ "$SELECTOR" != "llama-gw-e2e" ]; then
echo "❌ InferencePool selector mismatch: expected 'llama-gw-e2e', got '$SELECTOR'"
exit 1
fi
echo "✅ InferencePool selector correct"
# Verify endpointPickerRef
EPP_NAME=$(kubectl get inferencepool llama-gw-e2e -n default \
-o jsonpath='{.spec.endpointPickerRef.name}')
if [ -z "$EPP_NAME" ]; then
echo "❌ InferencePool missing endpointPickerRef"
exit 1
fi
echo "✅ InferencePool endpointPickerRef set: $EPP_NAME"
- name: Verify HTTPRoute created
run: |
echo "Waiting for HTTPRoute..."
for i in $(seq 1 30); do
if kubectl get httproute llama-gw-e2e -n default > /dev/null 2>&1; then
echo "✅ HTTPRoute found"
break
fi
echo "Attempt $i/30: HTTPRoute not found yet"
if [ "$i" = "30" ]; then
echo "❌ Timed out waiting for HTTPRoute"
exit 1
fi
sleep 5
done
# Verify parent ref points to gateway
PARENT=$(kubectl get httproute llama-gw-e2e -n default \
-o jsonpath='{.spec.parentRefs[0].name}')
if [ "$PARENT" != "inference-gateway" ]; then
echo "❌ HTTPRoute parent mismatch: expected 'inference-gateway', got '$PARENT'"
exit 1
fi
echo "✅ HTTPRoute parent ref correct"
# Verify backend ref points to InferencePool
BACKEND_GROUP=$(kubectl get httproute llama-gw-e2e -n default \
-o jsonpath='{.spec.rules[0].backendRefs[0].group}')
BACKEND_KIND=$(kubectl get httproute llama-gw-e2e -n default \
-o jsonpath='{.spec.rules[0].backendRefs[0].kind}')
if [ "$BACKEND_GROUP" != "inference.networking.k8s.io" ] || [ "$BACKEND_KIND" != "InferencePool" ]; then
echo "❌ HTTPRoute backend ref mismatch: group=$BACKEND_GROUP kind=$BACKEND_KIND"
exit 1
fi
echo "✅ HTTPRoute backend ref correct"
- name: Verify gateway status and model name auto-discovery
run: |
echo "Waiting for GatewayReady condition..."
for i in $(seq 1 30); do
GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \
-o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}' 2>/dev/null || echo "")
if [ "$GW_READY" = "True" ]; then
echo "✅ GatewayReady condition is True"
break
fi
echo "Attempt $i/30: GatewayReady=$GW_READY"
if [ "$i" = "30" ]; then
echo "❌ Timed out waiting for GatewayReady condition"
exit 1
fi
sleep 5
done
# Check auto-discovered model name
MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \
-o jsonpath='{.status.gateway.modelName}')
if [ -z "$MODEL_NAME" ]; then
echo "❌ Gateway model name is empty"
exit 1
fi
echo "✅ Gateway model name auto-discovered: $MODEL_NAME"
- name: Wait for EPP to be ready
run: |
echo "Waiting for EPP deployment..."
for i in $(seq 1 30); do
READY=$(kubectl get deployment llama-gw-e2e-epp -n default -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
if [ "$READY" = "1" ]; then
echo "✅ EPP is ready"
break
fi
echo "Attempt $i/30: EPP readyReplicas=$READY"
if [ "$i" = "30" ]; then
echo "❌ EPP not ready"
exit 1
fi
sleep 10
done
- name: Install Body-Based Router (BBR)
run: |
helm install body-based-router \
--set provider.name=istio \
--version "${GAIE_VERSION}" \
oci://registry.k8s.io/gateway-api-inference-extension/charts/body-based-routing \
--wait --timeout 120s
echo "✅ BBR installed"
- name: Test inference through gateway
run: |
MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \
-o jsonpath='{.status.gateway.modelName}')
echo "Model name: $MODEL_NAME"
# Get the Gateway LoadBalancer IP (provided by cloud-provider-kind)
GW_IP=""
for i in $(seq 1 30); do
GW_IP=$(kubectl get gateway inference-gateway -o jsonpath='{.status.addresses[0].value}' 2>/dev/null || echo "")
if [ -n "$GW_IP" ]; then
echo "Gateway IP: $GW_IP"
break
fi
echo "Waiting for Gateway IP... attempt $i/30"
sleep 5
done
if [ -z "$GW_IP" ]; then
echo "❌ Gateway IP not assigned"
exit 1
fi
echo "Sending inference request through gateway at http://${GW_IP}..."
for i in $(seq 1 18); do
HTTP_CODE=$(curl -s -o /tmp/response.json -w '%{http_code}' --max-time 30 \
http://${GW_IP}/v1/chat/completions \
-H "Content-Type: application/json" \
-d "{
\"model\": \"$MODEL_NAME\",
\"messages\": [{\"role\": \"user\", \"content\": \"Say hello in one word.\"}],
\"max_tokens\": 10
}" 2>&1 || true)
RESPONSE=$(cat /tmp/response.json 2>/dev/null || echo "")
if [ "$HTTP_CODE" = "200" ] && echo "$RESPONSE" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
echo "Response: $RESPONSE"
echo "✅ Inference through gateway succeeded"
exit 0
fi
echo "Attempt $i/18: HTTP=$HTTP_CODE body=$(echo $RESPONSE | head -c 200)"
sleep 10
done
echo "❌ Inference through gateway failed"
exit 1
- name: Test gateway disable and cleanup
run: |
# Disable gateway
kubectl patch modeldeployment llama-gw-e2e -n default \
--type=merge -p '{"spec":{"gateway":{"enabled":false}}}'
echo "Waiting for gateway resources to be cleaned up..."
sleep 15
# Verify InferencePool deleted
if kubectl get inferencepool llama-gw-e2e -n default 2>/dev/null; then
echo "❌ InferencePool should have been deleted"
exit 1
fi
echo "✅ InferencePool cleaned up"
# Verify HTTPRoute deleted
if kubectl get httproute llama-gw-e2e -n default 2>/dev/null; then
echo "❌ HTTPRoute should have been deleted"
exit 1
fi
echo "✅ HTTPRoute cleaned up"
# Verify GatewayReady condition is False
GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \
-o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}')
if [ "$GW_READY" != "False" ]; then
echo "❌ GatewayReady condition should be False after disable: $GW_READY"
exit 1
fi
echo "✅ GatewayReady condition is False after disable"
- name: Collect debug info
if: failure()
run: |
echo "=== ModelDeployments ==="
kubectl get modeldeployments -A -o yaml
echo "=== InferencePools ==="
kubectl get inferencepools -A -o yaml 2>/dev/null || echo "No InferencePools"
echo "=== HTTPRoutes ==="
kubectl get httproutes -A -o yaml 2>/dev/null || echo "No HTTPRoutes"
echo "=== Gateways ==="
kubectl get gateways -A -o yaml 2>/dev/null || echo "No Gateways"
echo "=== Workspaces ==="
kubectl get workspaces -A -o yaml
echo "=== Controller Logs ==="
kubectl logs -n airunway-system -l control-plane=controller-manager --tail=200
echo "=== KAITO Provider Logs ==="
kubectl logs -n airunway-system -l control-plane=kaito-provider --tail=100
echo "=== EPP Logs ==="
kubectl logs -n default -l app.kubernetes.io/name=llama-gw-e2e-epp --tail=100 2>/dev/null || echo "No EPP logs"
echo "=== Istio Logs ==="
kubectl logs -n istio-system -l app=istiod --tail=100 2>/dev/null || echo "No Istio logs"
echo "=== Gateway Proxy Logs ==="
GW_POD=$(kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
[ -n "$GW_POD" ] && kubectl logs "$GW_POD" -n default --tail=50 2>/dev/null || echo "No gateway proxy logs"
echo "=== Gateway Pods ==="
kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o yaml
echo "=== Events ==="
kubectl get events -A --sort-by=.lastTimestamp
echo "=== Pods ==="
kubectl get pods -A
- name: Cleanup
if: always()
run: |
kind delete cluster --name airunway-gw-e2e