debug:

zdtsw · zdtsw · commit 656490ae514c · 2026-03-11T09:05:55.000+01:00
- prolong timeout in test
- add INFO to understand why did not find VA

Signed-off-by: Wen Zhou &lt;wenzhou@redhat.com&gt;
diff --git a/deploy/install.sh b/deploy/install.sh
@@ -1105,7 +1105,7 @@ deploy_llm_d_infrastructure() {
     fi
 
     log_info "Waiting for llm-d components to initialize..."
-    kubectl wait --for=condition=Available deployment --all -n $LLMD_NS --timeout=60s || \
+    kubectl wait --for=condition=Available deployment --all -n $LLMD_NS --timeout=120s || \
         log_warning "llm-d components are not ready yet - check 'kubectl get pods -n $LLMD_NS'"
 
     # Align WVA with the InferencePool API group in use (scale-from-zero requires WVA to watch the same group).
diff --git a/internal/engines/scalefromzero/engine.go b/internal/engines/scalefromzero/engine.go
@@ -253,10 +253,15 @@ func (e *Engine) processInactiveVariant(ctx context.Context, va wvav1alpha1.Vari
 	// Check for pending requests using EPP flowcontrol queue size metrics
 	result := results["all_metrics"]
 	pendingRequestExist := false
+	var queueMetricFound bool
+	var queueMetricModels []string
 	for _, value := range result.Values {
 		metricName := value.Labels["__name__"]
 		if metricName == targetEPPMetricName && value.Value > 0 {
-			if value.Labels[targetEPPMetricLabel] == va.Spec.ModelID {
+			queueMetricFound = true
+			modelLabel := value.Labels[targetEPPMetricLabel]
+			queueMetricModels = append(queueMetricModels, modelLabel)
+			if modelLabel == va.Spec.ModelID {
 				logger.Info(
 					"Target workload has pending requests, scaling up from zero", "metricName", metricName,
 					"metric", value.Labels, "value", value.Value)
@@ -267,6 +272,13 @@ func (e *Engine) processInactiveVariant(ctx context.Context, va wvav1alpha1.Vari
 	}
 
 	if !pendingRequestExist {
+		// Log INFO only when queue exists but model doesn't match
+		if queueMetricFound {
+			logger.Info("Scale-from-zero: queue has pending requests but model not matched",
+				"va", va.Name,
+				"vaModelID", va.Spec.ModelID,
+				"queueModels", queueMetricModels)
+		}
 		// Scale-from-zero loop runs every 100ms; log at DEBUG to avoid flooding (10/sec per inactive VA).
 		logger.V(logging.DEBUG).Info("Scale-from-zero: skipping VA, no pending requests in flow control queue",
 			"va", va.Name,
diff --git a/test/e2e/scale_from_zero_test.go b/test/e2e/scale_from_zero_test.go
@@ -326,8 +326,9 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
 			GinkgoWriter.Println("Job pod is running and sending requests")
 
 			// Give requests time to queue up in EPP before checking for scale-up
+			// Increased from 10s to 20s for CI environments where gateway may need more time
 			By("Waiting for requests to queue up in EPP flow control queue")
-			time.Sleep(10 * time.Second)
+			time.Sleep(20 * time.Second)
 
 			By("Monitoring VariantAutoscaling for scale-from-zero decision")
 			Eventually(func(g Gomega) {