@@ -75,7 +75,7 @@ var prefillResults []PrefillResult
7575
7676const prefillResultsFile = "/tmp/prefill-benchmark-results.json"
7777
78- var _ = Describe ("Prefill Heavy Workload Benchmark" , Ordered , Label ("benchmark" , "phase3a " ), func () {
78+ var _ = Describe ("Scaling Benchmark" , Ordered , Label ("benchmark" ), func () {
7979 var (
8080 ctx context.Context
8181 cancel context.CancelFunc
@@ -439,7 +439,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Ordered, Label("benchmark",
439439 }
440440 }
441441
442- runPrefillBenchmark := func (autoscalerType string ) {
442+ runBenchmarkScenario := func (autoscalerType string , scenarioName string ) {
443443 ensureEPPConfig ()
444444 ensureInfraDeploymentReady ()
445445 verifyEPPConfig ()
@@ -487,9 +487,13 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Ordered, Label("benchmark",
487487
488488 By ("Launching GuideLLM Load Generator" )
489489
490+ scenario := LoadScenario (scenarioName )
491+ GinkgoWriter .Printf (" Scenario: %s (prompt=%d, output=%d, rate=%d)\n " ,
492+ scenario .Name , scenario .PromptTokens , scenario .OutputTokens , scenario .Rate )
493+
490494 err = CreateGuideLLMJobWithArgs (
491495 ctx , k8sClient , benchCfg .LLMDNamespace , res .ModelService ,
492- targetURL , benchCfg .ModelID ,
496+ targetURL , benchCfg .ModelID , scenario ,
493497 )
494498 Expect (err ).NotTo (HaveOccurred (), "Failed to create GuideLLM load job" )
495499
@@ -806,7 +810,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Ordered, Label("benchmark",
806810 }
807811
808812 GinkgoWriter .Printf ("\n ┌────────────────────────────────────────────────────────────\n " )
809- GinkgoWriter .Printf (" │ %s PREFILL BENCHMARK RESULTS\n " , autoscalerType )
813+ GinkgoWriter .Printf (" │ %s %s BENCHMARK RESULTS\n " , autoscalerType , strings . ToUpper ( scenario . Name ) )
810814 GinkgoWriter .Printf (" │ Model: %s\n " , benchCfg .ModelID )
811815 GinkgoWriter .Printf (" ├────────────────────────────────────────────────────────────\n " )
812816 GinkgoWriter .Printf (" │ Duration: %.0fs\n " , loadDuration )
@@ -841,7 +845,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Ordered, Label("benchmark",
841845 _ = os .WriteFile (prefillResultsFile , data , 0644 )
842846 }
843847
844- Context ("WVA" , func () {
848+ Context ("WVA Prefill Heavy" , Label ( "phase3a" ) , func () {
845849 It ("should run the prefill heavy workload against WVA" , func () {
846850 cleanupAutoscalers ()
847851 res .DeploymentName = findInfraDecodeDeployment ()
@@ -872,7 +876,42 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Ordered, Label("benchmark",
872876
873877 waitForVAAndMetrics ()
874878
875- runPrefillBenchmark ("WVA" )
879+ runBenchmarkScenario ("WVA" , "prefill_heavy" )
880+ })
881+ })
882+
883+ Context ("WVA Decode Heavy" , Label ("decode-heavy" ), func () {
884+ It ("should run the decode heavy workload against WVA" , func () {
885+ cleanupAutoscalers ()
886+ res .DeploymentName = findInfraDecodeDeployment ()
887+ ensureInfraDeploymentReady ()
888+
889+ By ("Creating VariantAutoscaling resource (max=10, cost=10)" )
890+ err := fixtures .EnsureVariantAutoscaling (
891+ ctx , crClient , benchCfg .LLMDNamespace , res .VAName , res .DeploymentName ,
892+ benchCfg .ModelID , benchCfg .AcceleratorType , 10.0 , benchCfg .ControllerInstance ,
893+ fixtures .WithMinReplicas (1 ),
894+ fixtures .WithMaxReplicas (10 ),
895+ )
896+ Expect (err ).NotTo (HaveOccurred (), "Failed to create VA" )
897+
898+ By ("Creating HPA (Scale Up: 0s/Pods/10/150, Scale Down: 240s/Pods/10/150)" )
899+ behavior := & autoscalingv2.HorizontalPodAutoscalerBehavior {
900+ ScaleUp : & autoscalingv2.HPAScalingRules {
901+ StabilizationWindowSeconds : ptr .To (int32 (0 )),
902+ Policies : []autoscalingv2.HPAScalingPolicy {{Type : autoscalingv2 .PodsScalingPolicy , Value : 10 , PeriodSeconds : 150 }},
903+ },
904+ ScaleDown : & autoscalingv2.HPAScalingRules {
905+ StabilizationWindowSeconds : ptr .To (int32 (240 )),
906+ Policies : []autoscalingv2.HPAScalingPolicy {{Type : autoscalingv2 .PodsScalingPolicy , Value : 10 , PeriodSeconds : 150 }},
907+ },
908+ }
909+ err = fixtures .EnsureHPA (ctx , k8sClient , benchCfg .LLMDNamespace , res .HPAName , res .DeploymentName , res .VAName , 1 , 10 , WithBehavior (behavior ))
910+ Expect (err ).NotTo (HaveOccurred (), "Failed to create HPA" )
911+
912+ waitForVAAndMetrics ()
913+
914+ runBenchmarkScenario ("WVA" , "decode_heavy" )
876915 })
877916 })
878917
0 commit comments