Skip to content

Commit 656490a

Browse files
committed
debug:
- prolong timeout in test - add INFO to understand why did not find VA Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent fcc2bc3 commit 656490a

3 files changed

Lines changed: 16 additions & 3 deletions

File tree

deploy/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1105,7 +1105,7 @@ deploy_llm_d_infrastructure() {
11051105
fi
11061106

11071107
log_info "Waiting for llm-d components to initialize..."
1108-
kubectl wait --for=condition=Available deployment --all -n $LLMD_NS --timeout=60s || \
1108+
kubectl wait --for=condition=Available deployment --all -n $LLMD_NS --timeout=120s || \
11091109
log_warning "llm-d components are not ready yet - check 'kubectl get pods -n $LLMD_NS'"
11101110

11111111
# Align WVA with the InferencePool API group in use (scale-from-zero requires WVA to watch the same group).

internal/engines/scalefromzero/engine.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,15 @@ func (e *Engine) processInactiveVariant(ctx context.Context, va wvav1alpha1.Vari
253253
// Check for pending requests using EPP flowcontrol queue size metrics
254254
result := results["all_metrics"]
255255
pendingRequestExist := false
256+
var queueMetricFound bool
257+
var queueMetricModels []string
256258
for _, value := range result.Values {
257259
metricName := value.Labels["__name__"]
258260
if metricName == targetEPPMetricName && value.Value > 0 {
259-
if value.Labels[targetEPPMetricLabel] == va.Spec.ModelID {
261+
queueMetricFound = true
262+
modelLabel := value.Labels[targetEPPMetricLabel]
263+
queueMetricModels = append(queueMetricModels, modelLabel)
264+
if modelLabel == va.Spec.ModelID {
260265
logger.Info(
261266
"Target workload has pending requests, scaling up from zero", "metricName", metricName,
262267
"metric", value.Labels, "value", value.Value)
@@ -267,6 +272,13 @@ func (e *Engine) processInactiveVariant(ctx context.Context, va wvav1alpha1.Vari
267272
}
268273

269274
if !pendingRequestExist {
275+
// Log INFO only when queue exists but model doesn't match
276+
if queueMetricFound {
277+
logger.Info("Scale-from-zero: queue has pending requests but model not matched",
278+
"va", va.Name,
279+
"vaModelID", va.Spec.ModelID,
280+
"queueModels", queueMetricModels)
281+
}
270282
// Scale-from-zero loop runs every 100ms; log at DEBUG to avoid flooding (10/sec per inactive VA).
271283
logger.V(logging.DEBUG).Info("Scale-from-zero: skipping VA, no pending requests in flow control queue",
272284
"va", va.Name,

test/e2e/scale_from_zero_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,9 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
326326
GinkgoWriter.Println("Job pod is running and sending requests")
327327

328328
// Give requests time to queue up in EPP before checking for scale-up
329+
// Increased from 10s to 20s for CI environments where gateway may need more time
329330
By("Waiting for requests to queue up in EPP flow control queue")
330-
time.Sleep(10 * time.Second)
331+
time.Sleep(20 * time.Second)
331332

332333
By("Monitoring VariantAutoscaling for scale-from-zero decision")
333334
Eventually(func(g Gomega) {

0 commit comments

Comments
 (0)