Skip to content

Commit 74855c5

Browse files
authored
Merge branch 'main' into newprovider
2 parents 92cb8c7 + 06d7fa7 commit 74855c5

45 files changed

Lines changed: 3533 additions & 469 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/e2e-gateway.yml

Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
name: E2E Gateway Tests
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch:
9+
10+
jobs:
11+
e2e-gateway:
12+
runs-on: ubuntu-latest-16-cores
13+
timeout-minutes: 45
14+
15+
steps:
16+
- name: Checkout repository
17+
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v4
18+
19+
- name: Setup Go
20+
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5
21+
with:
22+
go-version: "1.25"
23+
cache-dependency-path: controller/go.sum
24+
25+
- name: Setup Kind
26+
run: |
27+
go install sigs.k8s.io/kind@latest
28+
kind create cluster --name kubeairunway-gw-e2e --wait 120s
29+
# Allow workloads on control plane node for LoadBalancer access
30+
kubectl label node kubeairunway-gw-e2e-control-plane node.kubernetes.io/exclude-from-external-load-balancers- 2>/dev/null || true
31+
32+
- name: Install cloud-provider-kind
33+
run: |
34+
go install sigs.k8s.io/cloud-provider-kind@latest
35+
cloud-provider-kind &
36+
sleep 5
37+
echo "✅ cloud-provider-kind running"
38+
39+
- name: Install Gateway API CRDs
40+
run: |
41+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/latest/download/standard-install.yaml
42+
43+
- name: Install Gateway API Inference Extension CRDs
44+
run: |
45+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.3.1/manifests.yaml
46+
47+
- name: Install Istio with Inference Extension support
48+
run: |
49+
curl -L https://istio.io/downloadIstio | sh -
50+
cd istio-*/bin
51+
./istioctl install --set profile=minimal \
52+
--set values.pilot.env.ENABLE_GATEWAY_API_INFERENCE_EXTENSION=true -y
53+
kubectl wait --for=condition=Available deployment/istiod -n istio-system --timeout=120s
54+
echo "✅ Istio installed"
55+
56+
- name: Install KAITO operator
57+
run: |
58+
helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito
59+
helm install kaito-workspace kaito/workspace \
60+
--namespace kaito-workspace \
61+
--create-namespace \
62+
--set featureGates.disableNodeAutoProvisioning=true
63+
kubectl wait --for=condition=Available deployment -n kaito-workspace -l app.kubernetes.io/name=workspace --timeout=120s
64+
65+
- name: Build and deploy controller
66+
run: |
67+
make controller-docker-build CONTROLLER_IMG=kubeairunway-controller:e2e
68+
kind load docker-image kubeairunway-controller:e2e --name kubeairunway-gw-e2e
69+
make controller-deploy CONTROLLER_IMG=kubeairunway-controller:e2e
70+
kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=controller-manager --timeout=120s
71+
72+
- name: Build and deploy KAITO provider
73+
run: |
74+
make kaito-provider-docker-build KAITO_PROVIDER_IMG=kaito-provider:e2e
75+
kind load docker-image kaito-provider:e2e --name kubeairunway-gw-e2e
76+
make kaito-provider-deploy KAITO_PROVIDER_IMG=kaito-provider:e2e
77+
kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=kaito-provider --timeout=120s
78+
79+
- name: Wait for provider registration
80+
run: |
81+
kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/kaito --timeout=120s
82+
83+
- name: Create Gateway resource
84+
run: |
85+
kubectl apply -f controller/test/e2e/testdata/gateway.yaml
86+
echo "Waiting for Gateway to be programmed..."
87+
for i in $(seq 1 30); do
88+
PROGRAMMED=$(kubectl get gateway inference-gateway -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null || echo "")
89+
if [ "$PROGRAMMED" = "True" ]; then
90+
echo "✅ Gateway is programmed"
91+
break
92+
fi
93+
echo "Attempt $i/30: programmed=$PROGRAMMED"
94+
if [ "$i" = "30" ]; then
95+
echo "⚠️ Gateway not programmed after 30 attempts, continuing anyway (Kind may not support LoadBalancer)"
96+
fi
97+
sleep 5
98+
done
99+
100+
- name: Create ModelDeployment with gateway enabled
101+
run: |
102+
kubectl apply -f controller/test/e2e/testdata/gateway-modeldeployment.yaml
103+
104+
- name: Wait for ModelDeployment to reach Running phase
105+
run: |
106+
kubectl wait --for=condition=WorkspaceSucceeded workspace/llama-gw-e2e -n default --timeout=600s 2>/dev/null || true
107+
108+
echo "Waiting for ModelDeployment to reach Running phase..."
109+
for i in $(seq 1 60); do
110+
PHASE=$(kubectl get modeldeployment llama-gw-e2e -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
111+
echo "Attempt $i/60: phase=$PHASE"
112+
if [ "$PHASE" = "Running" ]; then
113+
echo "✅ ModelDeployment is Running"
114+
exit 0
115+
fi
116+
sleep 10
117+
done
118+
echo "❌ Timed out waiting for ModelDeployment to reach Running phase"
119+
exit 1
120+
121+
- name: Verify InferencePool created
122+
run: |
123+
echo "Waiting for InferencePool..."
124+
for i in $(seq 1 30); do
125+
if kubectl get inferencepool llama-gw-e2e -n default > /dev/null 2>&1; then
126+
echo "✅ InferencePool found"
127+
break
128+
fi
129+
echo "Attempt $i/30: InferencePool not found yet"
130+
if [ "$i" = "30" ]; then
131+
echo "❌ Timed out waiting for InferencePool"
132+
exit 1
133+
fi
134+
sleep 5
135+
done
136+
137+
# Verify selector label
138+
SELECTOR=$(kubectl get inferencepool llama-gw-e2e -n default \
139+
-o jsonpath='{.spec.selector.matchLabels.kubeairunway\.ai/model-deployment}')
140+
if [ "$SELECTOR" != "llama-gw-e2e" ]; then
141+
echo "❌ InferencePool selector mismatch: expected 'llama-gw-e2e', got '$SELECTOR'"
142+
exit 1
143+
fi
144+
echo "✅ InferencePool selector correct"
145+
146+
# Verify endpointPickerRef
147+
EPP_NAME=$(kubectl get inferencepool llama-gw-e2e -n default \
148+
-o jsonpath='{.spec.endpointPickerRef.name}')
149+
if [ -z "$EPP_NAME" ]; then
150+
echo "❌ InferencePool missing endpointPickerRef"
151+
exit 1
152+
fi
153+
echo "✅ InferencePool endpointPickerRef set: $EPP_NAME"
154+
155+
- name: Verify HTTPRoute created
156+
run: |
157+
echo "Waiting for HTTPRoute..."
158+
for i in $(seq 1 30); do
159+
if kubectl get httproute llama-gw-e2e -n default > /dev/null 2>&1; then
160+
echo "✅ HTTPRoute found"
161+
break
162+
fi
163+
echo "Attempt $i/30: HTTPRoute not found yet"
164+
if [ "$i" = "30" ]; then
165+
echo "❌ Timed out waiting for HTTPRoute"
166+
exit 1
167+
fi
168+
sleep 5
169+
done
170+
171+
# Verify parent ref points to gateway
172+
PARENT=$(kubectl get httproute llama-gw-e2e -n default \
173+
-o jsonpath='{.spec.parentRefs[0].name}')
174+
if [ "$PARENT" != "inference-gateway" ]; then
175+
echo "❌ HTTPRoute parent mismatch: expected 'inference-gateway', got '$PARENT'"
176+
exit 1
177+
fi
178+
echo "✅ HTTPRoute parent ref correct"
179+
180+
# Verify backend ref points to InferencePool
181+
BACKEND_GROUP=$(kubectl get httproute llama-gw-e2e -n default \
182+
-o jsonpath='{.spec.rules[0].backendRefs[0].group}')
183+
BACKEND_KIND=$(kubectl get httproute llama-gw-e2e -n default \
184+
-o jsonpath='{.spec.rules[0].backendRefs[0].kind}')
185+
if [ "$BACKEND_GROUP" != "inference.networking.k8s.io" ] || [ "$BACKEND_KIND" != "InferencePool" ]; then
186+
echo "❌ HTTPRoute backend ref mismatch: group=$BACKEND_GROUP kind=$BACKEND_KIND"
187+
exit 1
188+
fi
189+
echo "✅ HTTPRoute backend ref correct"
190+
191+
- name: Verify gateway status and model name auto-discovery
192+
run: |
193+
echo "Waiting for GatewayReady condition..."
194+
for i in $(seq 1 30); do
195+
GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \
196+
-o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}' 2>/dev/null || echo "")
197+
if [ "$GW_READY" = "True" ]; then
198+
echo "✅ GatewayReady condition is True"
199+
break
200+
fi
201+
echo "Attempt $i/30: GatewayReady=$GW_READY"
202+
if [ "$i" = "30" ]; then
203+
echo "❌ Timed out waiting for GatewayReady condition"
204+
exit 1
205+
fi
206+
sleep 5
207+
done
208+
209+
# Check auto-discovered model name
210+
MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \
211+
-o jsonpath='{.status.gateway.modelName}')
212+
if [ -z "$MODEL_NAME" ]; then
213+
echo "❌ Gateway model name is empty"
214+
exit 1
215+
fi
216+
echo "✅ Gateway model name auto-discovered: $MODEL_NAME"
217+
218+
- name: Wait for EPP to be ready
219+
run: |
220+
echo "Waiting for EPP deployment..."
221+
for i in $(seq 1 30); do
222+
READY=$(kubectl get deployment llama-gw-e2e-epp -n default -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
223+
if [ "$READY" = "1" ]; then
224+
echo "✅ EPP is ready"
225+
break
226+
fi
227+
echo "Attempt $i/30: EPP readyReplicas=$READY"
228+
if [ "$i" = "30" ]; then
229+
echo "❌ EPP not ready"
230+
exit 1
231+
fi
232+
sleep 10
233+
done
234+
235+
- name: Configure Istio DestinationRule for EPP
236+
run: |
237+
kubectl apply -f - <<'DREOF'
238+
apiVersion: networking.istio.io/v1beta1
239+
kind: DestinationRule
240+
metadata:
241+
name: llama-gw-e2e-epp
242+
namespace: default
243+
spec:
244+
host: llama-gw-e2e-epp.default.svc.cluster.local
245+
trafficPolicy:
246+
tls:
247+
mode: SIMPLE
248+
insecureSkipVerify: true
249+
DREOF
250+
echo "✅ Istio DestinationRule created for EPP"
251+
252+
- name: Install Body-Based Router (BBR)
253+
run: |
254+
helm install body-based-router \
255+
--set provider.name=istio \
256+
--version v1.3.1 \
257+
oci://registry.k8s.io/gateway-api-inference-extension/charts/body-based-routing \
258+
--wait --timeout 120s
259+
echo "✅ BBR installed"
260+
261+
- name: Test inference through gateway
262+
run: |
263+
MODEL_NAME=$(kubectl get modeldeployment llama-gw-e2e -n default \
264+
-o jsonpath='{.status.gateway.modelName}')
265+
echo "Model name: $MODEL_NAME"
266+
267+
# Get the Gateway LoadBalancer IP (provided by cloud-provider-kind)
268+
GW_IP=""
269+
for i in $(seq 1 30); do
270+
GW_IP=$(kubectl get gateway inference-gateway -o jsonpath='{.status.addresses[0].value}' 2>/dev/null || echo "")
271+
if [ -n "$GW_IP" ]; then
272+
echo "Gateway IP: $GW_IP"
273+
break
274+
fi
275+
echo "Waiting for Gateway IP... attempt $i/30"
276+
sleep 5
277+
done
278+
279+
if [ -z "$GW_IP" ]; then
280+
echo "❌ Gateway IP not assigned"
281+
exit 1
282+
fi
283+
284+
echo "Sending inference request through gateway at http://${GW_IP}..."
285+
for i in $(seq 1 18); do
286+
HTTP_CODE=$(curl -s -o /tmp/response.json -w '%{http_code}' --max-time 30 \
287+
http://${GW_IP}/v1/chat/completions \
288+
-H "Content-Type: application/json" \
289+
-d "{
290+
\"model\": \"$MODEL_NAME\",
291+
\"messages\": [{\"role\": \"user\", \"content\": \"Say hello in one word.\"}],
292+
\"max_tokens\": 10
293+
}" 2>&1 || true)
294+
RESPONSE=$(cat /tmp/response.json 2>/dev/null || echo "")
295+
296+
if [ "$HTTP_CODE" = "200" ] && echo "$RESPONSE" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
297+
echo "Response: $RESPONSE"
298+
echo "✅ Inference through gateway succeeded"
299+
exit 0
300+
fi
301+
echo "Attempt $i/18: HTTP=$HTTP_CODE body=$(echo $RESPONSE | head -c 200)"
302+
sleep 10
303+
done
304+
echo "❌ Inference through gateway failed"
305+
exit 1
306+
307+
- name: Test gateway disable and cleanup
308+
run: |
309+
# Disable gateway
310+
kubectl patch modeldeployment llama-gw-e2e -n default \
311+
--type=merge -p '{"spec":{"gateway":{"enabled":false}}}'
312+
313+
echo "Waiting for gateway resources to be cleaned up..."
314+
sleep 15
315+
316+
# Verify InferencePool deleted
317+
if kubectl get inferencepool llama-gw-e2e -n default 2>/dev/null; then
318+
echo "❌ InferencePool should have been deleted"
319+
exit 1
320+
fi
321+
echo "✅ InferencePool cleaned up"
322+
323+
# Verify HTTPRoute deleted
324+
if kubectl get httproute llama-gw-e2e -n default 2>/dev/null; then
325+
echo "❌ HTTPRoute should have been deleted"
326+
exit 1
327+
fi
328+
echo "✅ HTTPRoute cleaned up"
329+
330+
# Verify GatewayReady condition is False
331+
GW_READY=$(kubectl get modeldeployment llama-gw-e2e -n default \
332+
-o jsonpath='{.status.conditions[?(@.type=="GatewayReady")].status}')
333+
if [ "$GW_READY" != "False" ]; then
334+
echo "❌ GatewayReady condition should be False after disable: $GW_READY"
335+
exit 1
336+
fi
337+
echo "✅ GatewayReady condition is False after disable"
338+
339+
- name: Collect debug info
340+
if: failure()
341+
run: |
342+
echo "=== ModelDeployments ==="
343+
kubectl get modeldeployments -A -o yaml
344+
echo "=== InferencePools ==="
345+
kubectl get inferencepools -A -o yaml 2>/dev/null || echo "No InferencePools"
346+
echo "=== HTTPRoutes ==="
347+
kubectl get httproutes -A -o yaml 2>/dev/null || echo "No HTTPRoutes"
348+
echo "=== Gateways ==="
349+
kubectl get gateways -A -o yaml 2>/dev/null || echo "No Gateways"
350+
echo "=== Workspaces ==="
351+
kubectl get workspaces -A -o yaml
352+
echo "=== Controller Logs ==="
353+
kubectl logs -n kubeairunway-system -l control-plane=controller-manager --tail=200
354+
echo "=== KAITO Provider Logs ==="
355+
kubectl logs -n kubeairunway-system -l control-plane=kaito-provider --tail=100
356+
echo "=== EPP Logs ==="
357+
kubectl logs -n default -l app.kubernetes.io/name=llama-gw-e2e-epp --tail=100 2>/dev/null || echo "No EPP logs"
358+
echo "=== Istio Logs ==="
359+
kubectl logs -n istio-system -l app=istiod --tail=100 2>/dev/null || echo "No Istio logs"
360+
echo "=== Gateway Proxy Logs ==="
361+
GW_POD=$(kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
362+
[ -n "$GW_POD" ] && kubectl logs "$GW_POD" -n default --tail=50 2>/dev/null || echo "No gateway proxy logs"
363+
echo "=== Gateway Pods ==="
364+
kubectl get pods -n default -l gateway.networking.k8s.io/gateway-name=inference-gateway -o yaml
365+
echo "=== Events ==="
366+
kubectl get events -A --sort-by=.lastTimestamp
367+
echo "=== Pods ==="
368+
kubectl get pods -A
369+
370+
- name: Cleanup
371+
if: always()
372+
run: |
373+
kind delete cluster --name kubeairunway-gw-e2e

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ backend/src/embedded-assets.ts
5353
go.work
5454
go.work.sum
5555

56+
# Provider build outputs
57+
providers/*/bin/
58+
5659
# Playwright
5760
/test-results/
5861
/playwright-report/

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
# Controller image
99
CONTROLLER_IMG ?= ghcr.io/kaito-project/kubeairunway-controller:latest
1010

11+
# Gateway API Inference Extension version
12+
GAIE_VERSION ?= v1.3.1
13+
1114
# Provider images
1215
KAITO_PROVIDER_IMG ?= ghcr.io/kaito-project/kaito-provider:latest
1316
DYNAMO_PROVIDER_IMG ?= ghcr.io/kaito-project/dynamo-provider:latest

0 commit comments

Comments
 (0)