@@ -12,7 +12,6 @@ import (
1212 . "github.com/onsi/gomega"
1313 "github.com/prometheus/common/model"
1414 autoscalingv2 "k8s.io/api/autoscaling/v2"
15- corev1 "k8s.io/api/core/v1"
1615 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1716 "k8s.io/utils/ptr"
1817 "sigs.k8s.io/controller-runtime/pkg/client"
@@ -227,99 +226,41 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
227226 GinkgoWriter .Println ("--- End Diagnostics ---" )
228227 }
229228
230- // ensureEPPConfig is available but NOT called during benchmarks.
231- // The deploy script already enables flow control via env var. Patching
232- // the EPP with --config-file on v0.5.0-rc.1 causes Gateway routing to
233- // break (HTTP 500). Uncomment the call in runPrefillBenchmark when the
234- // EPP image supports --config-file alongside the env var.
235- _ = func () {
236- const configMapName = "benchmark-epp-config"
237-
238- By ("Creating EndpointPickerConfig ConfigMap" )
239- err := fixtures .EnsureEndpointPickerConfig (ctx , crClient , benchCfg .LLMDNamespace , configMapName )
240- Expect (err ).NotTo (HaveOccurred (), "Failed to create EndpointPickerConfig ConfigMap" )
241-
229+ // ensureEPPConfig patches the EPP with --config-text containing the
230+ // EndpointPickerConfig (flowControl + scorer weights 2/2/3). It also
231+ // removes the deprecated ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env
232+ // var to avoid conflicts — the config-text featureGates supersede it.
233+ ensureEPPConfig := func () {
242234 By ("Discovering EPP deployment" )
243235 eppDeployName , findErr := fixtures .FindEPPDeployment (ctx , k8sClient , benchCfg .LLMDNamespace )
244236 Expect (findErr ).NotTo (HaveOccurred (), "Failed to find EPP deployment" )
245237 GinkgoWriter .Printf (" Found EPP deployment: %s\n " , eppDeployName )
246238
247- By ("Patching EPP deployment with --config-file volume mount " )
248- patchErr := fixtures .PatchEPPWithConfigFile (ctx , k8sClient , benchCfg .LLMDNamespace , eppDeployName , configMapName )
249- Expect (patchErr ).NotTo (HaveOccurred (), "Failed to patch EPP deployment with config-file " )
239+ By ("Patching EPP deployment with --config-text (scorer weights 2/2/3, flowControl) " )
240+ patchErr := fixtures .PatchEPPWithConfigText (ctx , k8sClient , benchCfg .LLMDNamespace , eppDeployName )
241+ Expect (patchErr ).NotTo (HaveOccurred (), "Failed to patch EPP deployment with config-text " )
250242 GinkgoWriter .Println (" EPP deployment patched and rolled out successfully" )
251- }
252-
253- // ensureDirectModelService creates a ClusterIP service that targets the
254- // Helm-deployed model server pods directly on port 8000, bypassing the Gateway/EPP.
255- ensureDirectModelService := func () string {
256- svcName := "prefill-direct-vllm"
257- By ("Ensuring direct model server service for Gateway bypass" )
258243
259- deployment , dErr := k8sClient .AppsV1 ().Deployments (benchCfg .LLMDNamespace ).Get (ctx , res .DeploymentName , metav1.GetOptions {})
260- Expect (dErr ).NotTo (HaveOccurred (), "Failed to get deployment for label discovery" )
261- selector := deployment .Spec .Selector .MatchLabels
262- GinkgoWriter .Printf (" Using selector from deployment: %v\n " , selector )
263-
264- _ = k8sClient .CoreV1 ().Services (benchCfg .LLMDNamespace ).Delete (ctx , svcName , metav1.DeleteOptions {})
265- time .Sleep (time .Second )
266-
267- svc := & corev1.Service {
268- ObjectMeta : metav1.ObjectMeta {
269- Name : svcName ,
270- Namespace : benchCfg .LLMDNamespace ,
271- Labels : map [string ]string {"test-resource" : "true" },
272- },
273- Spec : corev1.ServiceSpec {
274- Type : corev1 .ServiceTypeClusterIP ,
275- Selector : selector ,
276- Ports : []corev1.ServicePort {{
277- Name : "http" ,
278- Port : 8000 ,
279- Protocol : corev1 .ProtocolTCP ,
280- }},
281- },
282- }
283- _ , createErr := k8sClient .CoreV1 ().Services (benchCfg .LLMDNamespace ).Create (ctx , svc , metav1.CreateOptions {})
284- Expect (createErr ).NotTo (HaveOccurred (), "Failed to create direct model server service" )
285-
286- directURL := fmt .Sprintf ("http://%s.%s.svc.cluster.local:8000" , svcName , benchCfg .LLMDNamespace )
287- GinkgoWriter .Printf (" Direct model server URL: %s\n " , directURL )
288- return directURL
244+ By ("Waiting for Gateway to become healthy after EPP rollout" )
245+ Eventually (func (g Gomega ) {
246+ gwURL := fmt .Sprintf ("http://%s.%s.svc.cluster.local:%d" ,
247+ benchCfg .GatewayServiceName , benchCfg .LLMDNamespace , benchCfg .GatewayServicePort )
248+ err := fixtures .VerifyGatewayConnectivity (ctx , k8sClient , benchCfg .LLMDNamespace , gwURL , benchCfg .ModelID )
249+ g .Expect (err ).NotTo (HaveOccurred (), "Gateway not ready yet after EPP rollout" )
250+ }, 5 * time .Minute , 15 * time .Second ).Should (Succeed (), "Gateway failed to become healthy after EPP config patch" )
251+ GinkgoWriter .Println (" Gateway is healthy after EPP config update" )
289252 }
290253
291254 runPrefillBenchmark := func (autoscalerType string ) {
292255 ensureInfraDeploymentReady ()
293- // NOTE: We intentionally do NOT call ensureEPPConfig() here.
294- // The deploy script (install.sh) already enables flow control via
295- // ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER=true env var on the EPP.
296- // Patching the EPP with --config-file causes it to restart and break
297- // Gateway routing (HTTP 500). The scorer weights use defaults.
256+ ensureEPPConfig ()
298257 dumpInfrastructureDiagnostics ()
299258
300259 gatewayURL := fmt .Sprintf ("http://%s.%s.svc.cluster.local:%d" ,
301260 benchCfg .GatewayServiceName , benchCfg .LLMDNamespace , benchCfg .GatewayServicePort )
302261
303- By ("Verifying Gateway connectivity (prefer Gateway for EPP queue metrics)" )
304- gwErr := fixtures .VerifyGatewayConnectivity (ctx , k8sClient , benchCfg .LLMDNamespace , gatewayURL , benchCfg .ModelID )
305-
306- var targetURL string
307- if gwErr != nil {
308- GinkgoWriter .Printf ("WARNING: Gateway connectivity check failed: %v\n " , gwErr )
309- GinkgoWriter .Println ("WARNING: Falling back to direct model server connection (bypassing Gateway/EPP)" )
310- GinkgoWriter .Println ("WARNING: EPP queue depth metrics will show 0 since traffic does not flow through EPP" )
311- targetURL = ensureDirectModelService ()
312-
313- By ("Verifying direct model server connectivity (with retries)" )
314- Eventually (func (g Gomega ) {
315- directErr := fixtures .VerifyGatewayConnectivity (ctx , k8sClient , benchCfg .LLMDNamespace , targetURL , benchCfg .ModelID )
316- g .Expect (directErr ).NotTo (HaveOccurred (), "Direct model server not yet reachable" )
317- }, 3 * time .Minute , 20 * time .Second ).Should (Succeed (), "Direct model server connectivity check failed after retries — backend is truly unreachable" )
318- } else {
319- GinkgoWriter .Println ("Gateway connectivity check passed — using Gateway URL (EPP queue metrics will be captured)" )
320- targetURL = gatewayURL
321- }
322- GinkgoWriter .Printf (" Using target URL: %s\n " , targetURL )
262+ targetURL := gatewayURL
263+ GinkgoWriter .Printf (" Using Gateway URL (traffic flows through EPP): %s\n " , targetURL )
323264
324265 By ("Checking Prometheus metric availability before load" )
325266 for _ , q := range []string {
0 commit comments