fix: resolve compilation errors in e2e limiter test

ev-shindin · mamy-CS · commit da65866aca9c · 2026-02-10T12:44:46.000-05:00
- Declare err with := in It scope (was only declared inside defer closure)
- Remove unused scaledUp variable
- Replace undefined finalReplicas references with desiredReplicas
diff --git a/test/e2e-saturation-based/e2e_limiter_test.go b/test/e2e-saturation-based/e2e_limiter_test.go
@@ -357,7 +357,7 @@ enableLimiter: true`
 			}()
 
 			By("waiting for Prometheus port-forward to be ready")
-			err = utils.VerifyPortForwardReadiness(ctx, prometheusLocalPort, fmt.Sprintf("https://localhost:%d/api/v1/query?query=up", prometheusLocalPort))
+			err := utils.VerifyPortForwardReadiness(ctx, prometheusLocalPort, fmt.Sprintf("https://localhost:%d/api/v1/query?query=up", prometheusLocalPort))
 			Expect(err).NotTo(HaveOccurred(), "Prometheus port-forward should be ready within timeout")
 
 			By("starting HIGH load generation to trigger scale-up beyond GPU capacity")
@@ -402,7 +402,6 @@ enableLimiter: true`
 
 			By("waiting for saturation detection and verifying limiter constraint")
 			var desiredReplicas int
-			var scaledUp bool
 
 			// First, wait for scale-up to occur (proves saturation was detected)
 			Eventually(func(g Gomega) {
@@ -423,12 +422,9 @@ enableLimiter: true`
 				g.Expect(accelerator).NotTo(BeEmpty(),
 					"DesiredOptimizedAlloc.Accelerator should be populated when metrics are flowing")
 
-				// Should scale up from initial replica
-				if desiredReplicas > int(initialReplicas) {
-					scaledUp = true
-				}
-				g.Expect(scaledUp).To(BeTrue(),
-					fmt.Sprintf("Should scale up from %d under heavy load", initialReplicas))
+				// Should scale up from initial 1 replica due to saturation
+				g.Expect(desiredReplicas).To(BeNumerically(">", int(initialReplicas)),
+					fmt.Sprintf("Should scale up from %d under load", initialReplicas))
 
 			}, 10*time.Minute, 10*time.Second).Should(Succeed())
 
@@ -438,8 +434,19 @@ enableLimiter: true`
 				fmt.Sprintf("Limiter should cap DesiredOptimizedAlloc.NumReplicas at %d (%d GPUs / %d GPUs per replica)",
 					maxReplicasOnNode, gpusOnTargetNode, gpusPerReplicaLimiter))
 
-			By("logging VariantAutoscaling status after limiter constraint test")
-			err = utils.LogVariantAutoscalingStatus(ctx, deployName, namespace, crClient, GinkgoWriter)
+				desiredReplicas = va.Status.DesiredOptimizedAlloc.NumReplicas
+				_, _ = fmt.Fprintf(GinkgoWriter, "Checking DesiredOptimizedAlloc.NumReplicas=%d against max=%d\n",
+					desiredReplicas, maxReplicasOnNode)
+
+				// Final replicas should not exceed max allowed by GPU capacity
+				g.Expect(desiredReplicas).To(BeNumerically("<=", maxReplicasOnNode),
+					fmt.Sprintf("Final replicas %d should be less than or equal to max %d due to GPU limiter",
+						desiredReplicas, maxReplicasOnNode))
+
+			}, 2*time.Minute, 10*time.Second).Should(Succeed())
+
+			By("logging VariantAutoscaling status after scale-up")
+			err = utils.LogVariantAutoscalingStatus(ctx, name, namespace, crClient, GinkgoWriter)
 			Expect(err).NotTo(HaveOccurred(), "Should be able to log VariantAutoscaling status")
 
 			_, _ = fmt.Fprintf(GinkgoWriter, "Limiter successfully constrained scale-up: desiredReplicas=%d (GPU max=%d)\n",