Skip to content

Commit 98ee08a

Browse files
committed
fix: use --config-text for EPP config, remove conflicting env var
The deploy script sets ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env var on the EPP. Using --config-file with featureGates: [flowControl] caused a conflict on v0.5.0-rc.1 that broke Gateway routing (HTTP 500). New approach: - Use --config-text to pass EndpointPickerConfig inline (no volume mount) - Remove the deprecated ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env var since config-text featureGates supersede it - Wait for Gateway health after EPP rollout (5min timeout) - Gateway is now a hard requirement (no fallback to direct vLLM) - Scorer weights: queue=2, kv-cache=2, prefix-cache=3 Made-with: Cursor
1 parent c517181 commit 98ee08a

2 files changed

Lines changed: 46 additions & 150 deletions

File tree

test/benchmark/prefill_heavy_benchmark_test.go

Lines changed: 19 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
. "github.com/onsi/gomega"
1313
"github.com/prometheus/common/model"
1414
autoscalingv2 "k8s.io/api/autoscaling/v2"
15-
corev1 "k8s.io/api/core/v1"
1615
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1716
"k8s.io/utils/ptr"
1817
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -227,99 +226,41 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
227226
GinkgoWriter.Println("--- End Diagnostics ---")
228227
}
229228

230-
// ensureEPPConfig is available but NOT called during benchmarks.
231-
// The deploy script already enables flow control via env var. Patching
232-
// the EPP with --config-file on v0.5.0-rc.1 causes Gateway routing to
233-
// break (HTTP 500). Uncomment the call in runPrefillBenchmark when the
234-
// EPP image supports --config-file alongside the env var.
235-
_ = func() {
236-
const configMapName = "benchmark-epp-config"
237-
238-
By("Creating EndpointPickerConfig ConfigMap")
239-
err := fixtures.EnsureEndpointPickerConfig(ctx, crClient, benchCfg.LLMDNamespace, configMapName)
240-
Expect(err).NotTo(HaveOccurred(), "Failed to create EndpointPickerConfig ConfigMap")
241-
229+
// ensureEPPConfig patches the EPP with --config-text containing the
230+
// EndpointPickerConfig (flowControl + scorer weights 2/2/3). It also
231+
// removes the deprecated ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env
232+
// var to avoid conflicts — the config-text featureGates supersede it.
233+
ensureEPPConfig := func() {
242234
By("Discovering EPP deployment")
243235
eppDeployName, findErr := fixtures.FindEPPDeployment(ctx, k8sClient, benchCfg.LLMDNamespace)
244236
Expect(findErr).NotTo(HaveOccurred(), "Failed to find EPP deployment")
245237
GinkgoWriter.Printf(" Found EPP deployment: %s\n", eppDeployName)
246238

247-
By("Patching EPP deployment with --config-file volume mount")
248-
patchErr := fixtures.PatchEPPWithConfigFile(ctx, k8sClient, benchCfg.LLMDNamespace, eppDeployName, configMapName)
249-
Expect(patchErr).NotTo(HaveOccurred(), "Failed to patch EPP deployment with config-file")
239+
By("Patching EPP deployment with --config-text (scorer weights 2/2/3, flowControl)")
240+
patchErr := fixtures.PatchEPPWithConfigText(ctx, k8sClient, benchCfg.LLMDNamespace, eppDeployName)
241+
Expect(patchErr).NotTo(HaveOccurred(), "Failed to patch EPP deployment with config-text")
250242
GinkgoWriter.Println(" EPP deployment patched and rolled out successfully")
251-
}
252-
253-
// ensureDirectModelService creates a ClusterIP service that targets the
254-
// Helm-deployed model server pods directly on port 8000, bypassing the Gateway/EPP.
255-
ensureDirectModelService := func() string {
256-
svcName := "prefill-direct-vllm"
257-
By("Ensuring direct model server service for Gateway bypass")
258243

259-
deployment, dErr := k8sClient.AppsV1().Deployments(benchCfg.LLMDNamespace).Get(ctx, res.DeploymentName, metav1.GetOptions{})
260-
Expect(dErr).NotTo(HaveOccurred(), "Failed to get deployment for label discovery")
261-
selector := deployment.Spec.Selector.MatchLabels
262-
GinkgoWriter.Printf(" Using selector from deployment: %v\n", selector)
263-
264-
_ = k8sClient.CoreV1().Services(benchCfg.LLMDNamespace).Delete(ctx, svcName, metav1.DeleteOptions{})
265-
time.Sleep(time.Second)
266-
267-
svc := &corev1.Service{
268-
ObjectMeta: metav1.ObjectMeta{
269-
Name: svcName,
270-
Namespace: benchCfg.LLMDNamespace,
271-
Labels: map[string]string{"test-resource": "true"},
272-
},
273-
Spec: corev1.ServiceSpec{
274-
Type: corev1.ServiceTypeClusterIP,
275-
Selector: selector,
276-
Ports: []corev1.ServicePort{{
277-
Name: "http",
278-
Port: 8000,
279-
Protocol: corev1.ProtocolTCP,
280-
}},
281-
},
282-
}
283-
_, createErr := k8sClient.CoreV1().Services(benchCfg.LLMDNamespace).Create(ctx, svc, metav1.CreateOptions{})
284-
Expect(createErr).NotTo(HaveOccurred(), "Failed to create direct model server service")
285-
286-
directURL := fmt.Sprintf("http://%s.%s.svc.cluster.local:8000", svcName, benchCfg.LLMDNamespace)
287-
GinkgoWriter.Printf(" Direct model server URL: %s\n", directURL)
288-
return directURL
244+
By("Waiting for Gateway to become healthy after EPP rollout")
245+
Eventually(func(g Gomega) {
246+
gwURL := fmt.Sprintf("http://%s.%s.svc.cluster.local:%d",
247+
benchCfg.GatewayServiceName, benchCfg.LLMDNamespace, benchCfg.GatewayServicePort)
248+
err := fixtures.VerifyGatewayConnectivity(ctx, k8sClient, benchCfg.LLMDNamespace, gwURL, benchCfg.ModelID)
249+
g.Expect(err).NotTo(HaveOccurred(), "Gateway not ready yet after EPP rollout")
250+
}, 5*time.Minute, 15*time.Second).Should(Succeed(), "Gateway failed to become healthy after EPP config patch")
251+
GinkgoWriter.Println(" Gateway is healthy after EPP config update")
289252
}
290253

291254
runPrefillBenchmark := func(autoscalerType string) {
292255
ensureInfraDeploymentReady()
293-
// NOTE: We intentionally do NOT call ensureEPPConfig() here.
294-
// The deploy script (install.sh) already enables flow control via
295-
// ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER=true env var on the EPP.
296-
// Patching the EPP with --config-file causes it to restart and break
297-
// Gateway routing (HTTP 500). The scorer weights use defaults.
256+
ensureEPPConfig()
298257
dumpInfrastructureDiagnostics()
299258

300259
gatewayURL := fmt.Sprintf("http://%s.%s.svc.cluster.local:%d",
301260
benchCfg.GatewayServiceName, benchCfg.LLMDNamespace, benchCfg.GatewayServicePort)
302261

303-
By("Verifying Gateway connectivity (prefer Gateway for EPP queue metrics)")
304-
gwErr := fixtures.VerifyGatewayConnectivity(ctx, k8sClient, benchCfg.LLMDNamespace, gatewayURL, benchCfg.ModelID)
305-
306-
var targetURL string
307-
if gwErr != nil {
308-
GinkgoWriter.Printf("WARNING: Gateway connectivity check failed: %v\n", gwErr)
309-
GinkgoWriter.Println("WARNING: Falling back to direct model server connection (bypassing Gateway/EPP)")
310-
GinkgoWriter.Println("WARNING: EPP queue depth metrics will show 0 since traffic does not flow through EPP")
311-
targetURL = ensureDirectModelService()
312-
313-
By("Verifying direct model server connectivity (with retries)")
314-
Eventually(func(g Gomega) {
315-
directErr := fixtures.VerifyGatewayConnectivity(ctx, k8sClient, benchCfg.LLMDNamespace, targetURL, benchCfg.ModelID)
316-
g.Expect(directErr).NotTo(HaveOccurred(), "Direct model server not yet reachable")
317-
}, 3*time.Minute, 20*time.Second).Should(Succeed(), "Direct model server connectivity check failed after retries — backend is truly unreachable")
318-
} else {
319-
GinkgoWriter.Println("Gateway connectivity check passed — using Gateway URL (EPP queue metrics will be captured)")
320-
targetURL = gatewayURL
321-
}
322-
GinkgoWriter.Printf(" Using target URL: %s\n", targetURL)
262+
targetURL := gatewayURL
263+
GinkgoWriter.Printf(" Using Gateway URL (traffic flows through EPP): %s\n", targetURL)
323264

324265
By("Checking Prometheus metric availability before load")
325266
for _, q := range []string{

test/e2e/fixtures/epp_config_builder.go

Lines changed: 27 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,11 @@ import (
99
corev1 "k8s.io/api/core/v1"
1010
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1111
"k8s.io/client-go/kubernetes"
12-
"sigs.k8s.io/controller-runtime/pkg/client"
1312
)
1413

15-
const eppConfigKey = "config.yaml"
16-
17-
// EnsureEndpointPickerConfig creates or updates the EndpointPickerConfig for the benchmark
18-
func EnsureEndpointPickerConfig(ctx context.Context, crClient client.Client, namespace, name string) error {
19-
eppConfig := &corev1.ConfigMap{
20-
ObjectMeta: metav1.ObjectMeta{
21-
Name: name,
22-
Namespace: namespace,
23-
},
24-
Data: map[string]string{
25-
eppConfigKey: `apiVersion: inference.networking.x-k8s.io/v1alpha1
14+
// EndpointPickerConfigYAML is the full config text passed via --config-text.
15+
// It enables flowControl and sets scorer weights: queue=2, kv-cache=2, prefix-cache=3.
16+
const EndpointPickerConfigYAML = `apiVersion: inference.networking.x-k8s.io/v1alpha1
2617
kind: EndpointPickerConfig
2718
featureGates:
2819
- flowControl
@@ -38,81 +29,45 @@ schedulingProfiles:
3829
- pluginRef: kv-cache-utilization-scorer
3930
weight: 2
4031
- pluginRef: prefix-cache-scorer
41-
weight: 3`,
42-
},
43-
}
44-
45-
existing := &corev1.ConfigMap{}
46-
err := crClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existing)
47-
if err == nil {
48-
eppConfig.SetResourceVersion(existing.GetResourceVersion())
49-
if err := crClient.Update(ctx, eppConfig); err != nil {
50-
return fmt.Errorf("failed to update EndpointPickerConfig ConfigMap: %w", err)
51-
}
52-
return nil
53-
}
54-
55-
if err := crClient.Create(ctx, eppConfig); err != nil {
56-
return fmt.Errorf("failed to create EndpointPickerConfig ConfigMap: %w", err)
57-
}
58-
59-
return nil
60-
}
61-
62-
// PatchEPPWithConfigFile patches the EPP deployment to mount the EndpointPickerConfig
63-
// ConfigMap and pass --config-file to the container. It reads the existing container
64-
// args and appends the flag so that all original flags (--pool-name, --grpc-port, etc.)
65-
// are preserved. It then waits for the rollout to complete.
66-
func PatchEPPWithConfigFile(ctx context.Context, k8sClient *kubernetes.Clientset, namespace, eppDeploymentName, configMapName string) error {
32+
weight: 3`
33+
34+
// PatchEPPWithConfigText patches the EPP deployment to use --config-text with
35+
// the EndpointPickerConfig YAML inline. It also removes the deprecated
36+
// ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env var to avoid conflicts (the
37+
// config-text featureGates supersede it). This approach avoids ConfigMap
38+
// volume mounts which simplifies the patch. Waits for the rollout to complete.
39+
func PatchEPPWithConfigText(ctx context.Context, k8sClient *kubernetes.Clientset, namespace, eppDeploymentName string) error {
6740
dep, err := k8sClient.AppsV1().Deployments(namespace).Get(ctx, eppDeploymentName, metav1.GetOptions{})
6841
if err != nil {
6942
return fmt.Errorf("failed to get EPP deployment %s: %w", eppDeploymentName, err)
7043
}
7144

72-
const volumeName = "epp-config"
73-
const mountPath = "/etc/epp"
74-
configFilePath := mountPath + "/" + eppConfigKey
75-
configFileArg := "--config-file=" + configFilePath
45+
c := &dep.Spec.Template.Spec.Containers[0]
7646

77-
// Check if already patched (volume already exists)
78-
for _, v := range dep.Spec.Template.Spec.Volumes {
79-
if v.Name == volumeName {
47+
// Check if already patched
48+
for _, a := range c.Args {
49+
if strings.HasPrefix(a, "--config-text=") {
8050
return nil
8151
}
8252
}
8353

84-
// Add volume
85-
dep.Spec.Template.Spec.Volumes = append(dep.Spec.Template.Spec.Volumes, corev1.Volume{
86-
Name: volumeName,
87-
VolumeSource: corev1.VolumeSource{
88-
ConfigMap: &corev1.ConfigMapVolumeSource{
89-
LocalObjectReference: corev1.LocalObjectReference{Name: configMapName},
90-
},
91-
},
92-
})
93-
94-
// Add volumeMount and --config-file arg to the first container, preserving existing args
95-
c := &dep.Spec.Template.Spec.Containers[0]
96-
c.VolumeMounts = append(c.VolumeMounts, corev1.VolumeMount{
97-
Name: volumeName,
98-
MountPath: mountPath,
99-
ReadOnly: true,
100-
})
101-
102-
hasArg := false
103-
for _, a := range c.Args {
104-
if strings.HasPrefix(a, "--config-file=") {
105-
hasArg = true
106-
break
54+
// Remove the ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER env var — the config
55+
// text's featureGates: [flowControl] supersedes it and having both causes
56+
// the EPP to malfunction on v0.5.0-rc.1.
57+
filtered := make([]corev1.EnvVar, 0, len(c.Env))
58+
for _, e := range c.Env {
59+
if e.Name != "ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER" {
60+
filtered = append(filtered, e)
10761
}
10862
}
109-
if !hasArg {
110-
c.Args = append(c.Args, configFileArg)
111-
}
63+
c.Env = filtered
64+
65+
// Append --config-text with inline YAML (preserves all existing args)
66+
c.Args = append(c.Args, "--config-text="+EndpointPickerConfigYAML)
11267

11368
_, err = k8sClient.AppsV1().Deployments(namespace).Update(ctx, dep, metav1.UpdateOptions{})
11469
if err != nil {
115-
return fmt.Errorf("failed to update EPP deployment with config-file: %w", err)
70+
return fmt.Errorf("failed to update EPP deployment with config-text: %w", err)
11671
}
11772

11873
// Wait for rollout: new pods ready

0 commit comments

Comments
 (0)