Skip to content

Commit 01c6d8a

Browse files
committed
fix: add recovery wait and retry for WVA test after HPA load
After the HPA test hammers the model server with 600s of heavy prefill load, the pod can become degraded (reports ready but returns HTTP 502). Add a 30s recovery sleep plus ensureInfraDeploymentReady before WVA, and retry the direct connectivity check for up to 3 minutes. Made-with: Cursor
1 parent c22910c commit 01c6d8a

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

test/benchmark/prefill_heavy_benchmark_test.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,11 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
270270
GinkgoWriter.Println("Falling back to direct model server connection (bypassing Gateway/EPP)")
271271
targetURL = ensureDirectModelService()
272272

273-
By("Verifying direct model server connectivity")
274-
directErr := fixtures.VerifyGatewayConnectivity(ctx, k8sClient, benchCfg.LLMDNamespace, targetURL, benchCfg.ModelID)
275-
Expect(directErr).NotTo(HaveOccurred(), "Direct model server connectivity check also failed — backend is truly unreachable")
273+
By("Verifying direct model server connectivity (with retries)")
274+
Eventually(func(g Gomega) {
275+
directErr := fixtures.VerifyGatewayConnectivity(ctx, k8sClient, benchCfg.LLMDNamespace, targetURL, benchCfg.ModelID)
276+
g.Expect(directErr).NotTo(HaveOccurred(), "Direct model server not yet reachable")
277+
}, 3*time.Minute, 20*time.Second).Should(Succeed(), "Direct model server connectivity check failed after retries — backend is truly unreachable")
276278
} else {
277279
GinkgoWriter.Println("Gateway connectivity check passed — using Gateway URL")
278280
targetURL = gatewayURL
@@ -515,6 +517,10 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
515517
cleanupAutoscalers()
516518
res.DeploymentName = findInfraDecodeDeployment()
517519

520+
By("Waiting for model server to recover after previous test")
521+
time.Sleep(30 * time.Second)
522+
ensureInfraDeploymentReady()
523+
518524
By("Creating VariantAutoscaling resource (Scale Up: 0s, Scale Down: 240s)")
519525
err := fixtures.EnsureVariantAutoscaling(
520526
ctx, crClient, benchCfg.LLMDNamespace, res.VAName, res.DeploymentName,

0 commit comments

Comments
 (0)