feat(controller,dynamo): add persistent storage support with PVC lifecycle and model download #230

Workflow file for this run

.github/workflows/e2e-controller.yml at 2788e55

	name: E2E Controller Tests

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]
	workflow_dispatch:

	jobs:
	e2e-controller:
	runs-on: ubuntu-latest-16-cores
	timeout-minutes: 30

	steps:
	- name: Checkout repository
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4

	- name: Setup Go
	uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v5
	with:
	go-version: "1.25"
	cache-dependency-path: controller/go.sum

	- name: Setup Kind
	run: \|
	go install sigs.k8s.io/kind@latest
	kind create cluster --name kubeairunway-e2e --wait 120s

	- name: Install KAITO operator
	run: \|
	helm repo add kaito https://kaito-project.github.io/kaito/charts/kaito
	helm install kaito-workspace kaito/workspace \
	--namespace kaito-workspace \
	--create-namespace \
	--set featureGates.disableNodeAutoProvisioning=true
	kubectl wait --for=condition=Available deployment -n kaito-workspace -l app.kubernetes.io/name=workspace --timeout=120s

	- name: Build and deploy controller
	run: \|
	make controller-docker-build CONTROLLER_IMG=kubeairunway-controller:e2e
	kind load docker-image kubeairunway-controller:e2e --name kubeairunway-e2e
	make controller-deploy CONTROLLER_IMG=kubeairunway-controller:e2e
	kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=controller-manager --timeout=120s

	- name: Build and deploy KAITO provider
	run: \|
	make -C providers/kaito docker-build IMG=kaito-provider:e2e
	kind load docker-image kaito-provider:e2e --name kubeairunway-e2e
	make -C providers/kaito deploy IMG=kaito-provider:e2e
	kubectl wait --for=condition=Available deployment -n kubeairunway-system -l control-plane=kaito-provider --timeout=120s

	- name: Wait for provider registration
	run: \|
	kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/kaito --timeout=120s

	- name: Create CPU-only ModelDeployment
	run: \|
	kubectl apply -f controller/test/e2e/testdata/cpu-modeldeployment.yaml

	- name: Wait for ModelDeployment to reach Running phase
	run: \|
	kubectl wait --for=condition=WorkspaceSucceeded workspace/llama-cpu-e2e -n default --timeout=600s 2>/dev/null \|\| true

	echo "Waiting for ModelDeployment to reach Running phase..."
	for i in $(seq 1 60); do
	PHASE=$(kubectl get modeldeployment llama-cpu-e2e -o jsonpath='{.status.phase}' 2>/dev/null \|\| echo "")
	echo "Attempt $i/60: phase=$PHASE"
	if [ "$PHASE" = "Running" ]; then
	echo "✅ ModelDeployment is Running"
	exit 0
	fi
	sleep 10
	done
	echo "❌ Timed out waiting for ModelDeployment to reach Running phase"
	exit 1

	- name: Test inference endpoint
	run: \|
	# Get the actual service port
	SVC_PORT=$(kubectl get svc llama-cpu-e2e -n default -o jsonpath='{.spec.ports[0].port}')
	echo "Service port: $SVC_PORT"

	kubectl port-forward svc/llama-cpu-e2e 8080:${SVC_PORT} -n default &
	sleep 5

	RESPONSE=$(curl -sf http://localhost:8080/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "llama-3.2-1b-instruct",
	"messages": [{"role": "user", "content": "Say hello in one word."}],
	"max_tokens": 10
	}')

	echo "Response: $RESPONSE"

	echo "$RESPONSE" \| jq -e '.choices' > /dev/null
	echo "$RESPONSE" \| jq -e '.choices[0].message.content' > /dev/null

	echo "✅ Inference endpoint responded with valid chat completion"

	- name: Collect debug info
	if: failure()
	run: \|
	echo "=== ModelDeployments ==="
	kubectl get modeldeployments -A -o yaml
	echo "=== InferenceProviderConfigs ==="
	kubectl get inferenceproviderconfigs -o yaml
	echo "=== Workspaces ==="
	kubectl get workspaces -A -o yaml
	echo "=== Controller Logs ==="
	kubectl logs -n kubeairunway-system -l control-plane=controller-manager --tail=100
	echo "=== KAITO Provider Logs ==="
	kubectl logs -n kubeairunway-system -l control-plane=kaito-provider --tail=100
	echo "=== Events ==="
	kubectl get events -A --sort-by=.lastTimestamp
	echo "=== Pods ==="
	kubectl get pods -A

	- name: Cleanup
	if: always()
	run: \|
	kind delete cluster --name kubeairunway-e2e

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

feat(controller,dynamo): add persistent storage support with PVC lifecycle and model download #230

Workflow file

feat(controller,dynamo): add persistent storage support with PVC lifecycle and model download #230

Uh oh!

Workflow file for this run