ix: handle KinD/minikube (no LoadBalancer) in deploy and conformance scripts

aneeshkp · aneeshkp · commit bd24da7c7067 · 2026-03-26T13:27:41.000-04:00
Signed-off-by: Aneesh Puttur &lt;aneeshputtur@gmail.com&gt;
diff --git a/Makefile b/Makefile
@@ -21,6 +21,10 @@ help:
 	@echo "  make undeploy            - Remove all infrastructure"
 	@echo "  make undeploy-kserve     - Remove KServe"
 	@echo ""
+	@echo "Mock model (no GPU):"
+	@echo "  make deploy-mock-model   - Deploy mock LLMInferenceService"
+	@echo "  make clean-mock-model    - Clean up mock deployment"
+	@echo ""
 	@echo "Other:"
 	@echo "  make status              - Show deployment status"
 	@echo "  make test                - Run ODH conformance tests"
diff --git a/scripts/setup-gateway.sh b/scripts/setup-gateway.sh
@@ -165,14 +165,24 @@ spec:
       name: ${GATEWAY_NAME}-config
 EOF
 
-  # Wait for Gateway to be programmed
-  log_wait "Waiting for Gateway to be programmed..."
-  if kubectl wait --for=condition=Programmed gateway/"${GATEWAY_NAME}" -n "${KSERVE_NAMESPACE}" --timeout=120s; then
-    log_success "Gateway created and programmed: ${GATEWAY_NAME}"
+  # Wait for Gateway to be accepted (Programmed requires a LoadBalancer which
+  # is not available on KinD/minikube)
+  log_wait "Waiting for Gateway to be accepted..."
+  kubectl wait --for=condition=Accepted gateway/"${GATEWAY_NAME}" -n "${KSERVE_NAMESPACE}" --timeout=120s
+  log_success "Gateway accepted: ${GATEWAY_NAME}"
+
+  # Wait for Programmed (external IP) — skip on platforms without LoadBalancer
+  if kubectl get nodes -o jsonpath='{.items[0].spec.providerID}' 2>/dev/null | grep -qE "^(kind|minikube)://"; then
+    log_info "Skipping Programmed check (no LoadBalancer on KinD/minikube)"
   else
-    log_error "Gateway failed to become programmed within timeout"
-    kubectl get gateway "${GATEWAY_NAME}" -n "${KSERVE_NAMESPACE}" -o yaml
-    return 1
+    log_wait "Waiting for Gateway to be programmed..."
+    if kubectl wait --for=condition=Programmed gateway/"${GATEWAY_NAME}" -n "${KSERVE_NAMESPACE}" --timeout=120s; then
+      log_success "Gateway programmed with external IP: ${GATEWAY_NAME}"
+    else
+      log_error "Gateway failed to become programmed within timeout"
+      kubectl get gateway "${GATEWAY_NAME}" -n "${KSERVE_NAMESPACE}" -o yaml
+      return 1
+    fi
   fi
 }
 
diff --git a/test/conformance/verify-llm-d-deployment.sh b/test/conformance/verify-llm-d-deployment.sh
@@ -982,16 +982,15 @@ check_llminferenceservice_resources() {
         # Check each one's status
         local ready_count=0
         local not_ready=()
-        while IFS= read -r line; do
-            local name ready
-            name=$(echo "$line" | awk '{print $1}')
-            ready=$(echo "$line" | awk '{print $3}')
+        while IFS= read -r name; do
+            local ready
+            ready=$($KUBECTL get llminferenceservice "$name" -n "$LLMD_NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "")
             if [[ "$ready" == "True" ]]; then
                 ((ready_count++))
             else
                 not_ready+=("$name")
             fi
-        done < <($KUBECTL get llminferenceservice -n "$LLMD_NAMESPACE" --no-headers 2>/dev/null)
+        done < <($KUBECTL get llminferenceservice -n "$LLMD_NAMESPACE" --no-headers -o custom-columns=NAME:.metadata.name 2>/dev/null)
 
         if [[ "$ready_count" -eq "$llmisvc_count" ]]; then
             log_pass "All $llmisvc_count LLMInferenceService(s) are Ready"
diff --git a/test/deploy-model.sh b/test/deploy-model.sh
@@ -142,12 +142,27 @@ kubectl get pods -n "$NAMESPACE"
 # Test inference via the service URL
 SERVICE_URL=$(kubectl get llmisvc "$MODEL_NAME" -n "$NAMESPACE" -o jsonpath='{.status.url}')
 echo ""
-echo "[INFO] Testing inference at $SERVICE_URL ..."
+
+if [[ -z "$SERVICE_URL" ]]; then
+    echo "[INFO] No external URL (no LoadBalancer — expected on KinD/minikube)"
+    echo "[INFO] Testing inference via port-forward to mock pod..."
+    POD_NAME=$(kubectl get pod -n "$NAMESPACE" -l app.kubernetes.io/name="$MODEL_NAME",kserve.io/component=workload -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
+    kubectl port-forward -n "$NAMESPACE" "pod/$POD_NAME" 8000:8000 &
+    PF_PID=$!
+    sleep 3
+    SERVICE_URL="https://localhost:8000"
+    CLEANUP_PF=true
+else
+    echo "[INFO] Testing inference at $SERVICE_URL ..."
+    CLEANUP_PF=false
+fi
 
 RESPONSE=$(curl -s -k --max-time 30 -X POST "${SERVICE_URL}/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -d '{"model":"mock-model","messages":[{"role":"user","content":"Hello"}],"max_tokens":20}')
 
+[[ "$CLEANUP_PF" == "true" ]] && kill $PF_PID 2>/dev/null || true
+
 if echo "$RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); print('[PASS] Response:', d['choices'][0]['message']['content'])" 2>/dev/null; then
     echo ""
     echo "=== Mock model deployed successfully ==="
@@ -156,10 +171,10 @@ if echo "$RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); print
     echo "  URL:                 $SERVICE_URL"
     echo "  Cleanup:             make clean-mock-model"
 else
-    echo "[WARN] Inference test via gateway failed (gateway may not be configured)"
+    echo "[WARN] Inference test failed"
     echo "  Response: $RESPONSE"
     echo ""
-    echo "=== Mock model deployed (inference via gateway not verified) ==="
+    echo "=== Mock model deployed (inference not verified) ==="
     echo "  LLMInferenceService: $MODEL_NAME"
     echo "  Namespace:           $NAMESPACE"
     echo "  Cleanup:             make clean-mock-model"