@@ -61,7 +61,7 @@ var _ = Describe("Test workload-variant-autoscaler - GPU Limiter Feature", Order
6161 var (
6262 ctx context.Context
6363
64- // Resource names - following saturation test pattern
64+ // Resource names
6565 name string
6666 deployName string
6767 serviceName string
@@ -258,7 +258,6 @@ enableLimiter: true`
258258
259259 Context ("Scenario 2: Scale-up under load with limiter constraint" , func () {
260260 It ("should scale up when saturation is detected but be constrained by GPU capacity" , func () {
261- // Following saturation test pattern exactly
262261 By ("setting up port-forward to Prometheus service" )
263262 prometheusPortForwardCmd := utils .SetUpPortForward (k8sClient , ctx , "kube-prometheus-stack-prometheus" , controllerMonitoringNamespace , prometheusLocalPort , 9090 )
264263 defer func () {
@@ -338,12 +337,27 @@ enableLimiter: true`
338337 g .Expect (finalReplicas ).To (BeNumerically (">" , int (initialReplicas )),
339338 fmt .Sprintf ("Should scale up from %d under load" , initialReplicas ))
340339
341- // Limiter should cap the scale-up at max replicas for the GPU type
340+ }, 10 * time .Minute , 10 * time .Second ).Should (Succeed ())
341+
342+ By ("verifying scale-up is constrained by GPU capacity via limiter" )
343+ Consistently (func (g Gomega ) {
344+ va := & v1alpha1.VariantAutoscaling {}
345+ err := crClient .Get (ctx , client.ObjectKey {
346+ Namespace : namespace ,
347+ Name : name ,
348+ }, va )
349+ g .Expect (err ).NotTo (HaveOccurred ())
350+
351+ finalReplicas = va .Status .DesiredOptimizedAlloc .NumReplicas
352+ _ , _ = fmt .Fprintf (GinkgoWriter , "Checking DesiredOptimizedAlloc.NumReplicas=%d against max=%d\n " ,
353+ finalReplicas , maxReplicasOnNode )
354+
355+ // Final replicas should not exceed max allowed by GPU capacity
342356 g .Expect (finalReplicas ).To (BeNumerically ("<=" , maxReplicasOnNode ),
343- fmt .Sprintf ("Limiter should cap replicas at %d (%d GPUs / %d GPUs per replica) " ,
344- maxReplicasOnNode , 4 , gpusPerReplicaLimiter ))
357+ fmt .Sprintf ("Final replicas %d should be less than or equal to max %d due to GPU limiter " ,
358+ finalReplicas , maxReplicasOnNode ))
345359
346- }, 10 * time .Minute , 10 * time .Second ).Should (Succeed ())
360+ }, 2 * time .Minute , 10 * time .Second ).Should (Succeed ())
347361
348362 By ("logging VariantAutoscaling status after scale-up" )
349363 err = utils .LogVariantAutoscalingStatus (ctx , name , namespace , crClient , GinkgoWriter )
0 commit comments