fix(controller): reject conflicting engine args at admission

surajssd · surajssd · commit dd71f5978eab · 2026-06-16T14:31:54.000-07:00
A launch flag set in both `spec.engine.args` and `spec.engine.extraArgs`
was only caught by the vLLM transform at reconcile time, so `kubectl
apply/patch` was admitted and the `ModelDeployment` flipped to `Failed`
asynchronously instead of the apply failing outright. The same latent
conflict also affected `llm-d`, which likewise appends `extraArgs`
verbatim after `engine.args`.

- Add a provider-agnostic `ValidateEngineArgs()` to
  `ModelDeploymentSpec` (with an `extraArgFlagKey` helper) that rejects
  a flag key present in both `engine.args` and `engine.extraArgs`; flags
  that legitimately repeat live only in `extraArgs` and are untouched.
- Call it from the validating webhook so the conflict fails the
  apply/patch synchronously (the provider is often auto-selected and
  unknown at admission, so the check is provider-agnostic).
- Delegate the vLLM transform to the shared method as a reconcile-time
  backstop, replacing the bespoke `validateDuplicateVLLMArgKeys`.
- Add unit tests for `ValidateEngineArgs` and webhook tests covering
  create, update, and the disjoint (no-conflict) case.

Signed-off-by: Suraj Deshmukh &lt;suraj.deshmukh@microsoft.com&gt;
diff --git a/controller/api/v1alpha1/modeldeployment_validation.go b/controller/api/v1alpha1/modeldeployment_validation.go
@@ -1,6 +1,9 @@
 package v1alpha1
 
-import "fmt"
+import (
+	"fmt"
+	"strings"
+)
 
 // ValidateImageFields verifies the legacy image override and the engine image
 // override do not request different container images.
@@ -27,3 +30,61 @@ func (spec *ModelDeploymentSpec) ImageOverride() string {
 	}
 	return spec.Image
 }
+
+// ValidateEngineArgs verifies that a launch flag is not set in BOTH
+// spec.engine.args (the structured map) and spec.engine.extraArgs (raw tokens).
+//
+// engine.args is a map, so a key can appear there at most once; finding the same
+// key again in extraArgs is an unambiguous contradiction. Providers that consume
+// both fields (Direct vLLM, llm-d) render engine.args first and then append
+// extraArgs verbatim, so such a collision would emit two conflicting copies of
+// the flag (e.g. "--tensor-parallel-size 4 … --tensor-parallel-size=2"). Engines
+// like vLLM parse last-wins, so the extraArgs value would silently defeat the
+// engine.args one. Reject the contradiction instead of guessing a winner; the
+// user sets the flag in exactly one place.
+//
+// This is provider-agnostic on purpose: it runs at admission for every
+// ModelDeployment (the provider is frequently auto-selected and unknown at
+// admission time) and is re-checked by the relevant provider transforms as a
+// reconcile-time backstop. Flags that legitimately repeat live only in extraArgs
+// and are untouched by this check.
+func (spec *ModelDeploymentSpec) ValidateEngineArgs() error {
+	if len(spec.Engine.Args) == 0 || len(spec.Engine.ExtraArgs) == 0 {
+		return nil
+	}
+	for _, arg := range spec.Engine.ExtraArgs {
+		key, ok := extraArgFlagKey(arg)
+		if !ok {
+			continue
+		}
+		if _, dup := spec.Engine.Args[key]; dup {
+			return fmt.Errorf(
+				"launch flag %q is set in both spec.engine.args and spec.engine.extraArgs (%q); set it in exactly one place so the engine does not receive conflicting values",
+				key, arg,
+			)
+		}
+	}
+	return nil
+}
+
+// extraArgFlagKey extracts the bare flag name from a raw extraArgs token,
+// stripping the leading "--" and any "=value" suffix. It returns ok=false for
+// tokens that are not "--flag" style (bare values, single-dash tokens, "--"),
+// which therefore cannot collide with a structured engine.args key.
+func extraArgFlagKey(arg string) (string, bool) {
+	if !strings.HasPrefix(arg, "--") || len(arg) <= 2 {
+		return "", false
+	}
+	body := strings.TrimPrefix(arg, "--")
+	if strings.HasPrefix(body, "-") {
+		return "", false
+	}
+	if equalIndex := strings.Index(body, "="); equalIndex >= 0 {
+		body = body[:equalIndex]
+	}
+	body = strings.TrimSpace(body)
+	if body == "" {
+		return "", false
+	}
+	return body, true
+}
diff --git a/controller/api/v1alpha1/modeldeployment_validation_test.go b/controller/api/v1alpha1/modeldeployment_validation_test.go
@@ -0,0 +1,97 @@
+/*
+Copyright 2026.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestValidateEngineArgs(t *testing.T) {
+	for _, tc := range []struct {
+		name      string
+		args      map[string]string
+		extraArgs []string
+		wantErr   bool
+	}{
+		{
+			name: "no overlap is allowed",
+			args: map[string]string{"gpu-memory-utilization": "0.9"},
+			extraArgs: []string{
+				"--enable-chunked-prefill",
+				"--max-num-seqs=64",
+			},
+			wantErr: false,
+		},
+		{
+			name:      "same key inline-value form in both is rejected",
+			args:      map[string]string{"tensor-parallel-size": "4"},
+			extraArgs: []string{"--tensor-parallel-size=2"},
+			wantErr:   true,
+		},
+		{
+			name:      "same key two-token form in both is rejected",
+			args:      map[string]string{"tensor-parallel-size": "4"},
+			extraArgs: []string{"--tensor-parallel-size", "2"},
+			wantErr:   true,
+		},
+		{
+			name:      "extraArgs-only flag is allowed (no conflict)",
+			extraArgs: []string{"--tensor-parallel-size=2"},
+			wantErr:   false,
+		},
+		{
+			name:    "args-only flag is allowed (no conflict)",
+			args:    map[string]string{"tensor-parallel-size": "4"},
+			wantErr: false,
+		},
+		{
+			name:      "bare positional token never collides",
+			args:      map[string]string{"tensor-parallel-size": "4"},
+			extraArgs: []string{"tensor-parallel-size"},
+			wantErr:   false,
+		},
+		{
+			name:    "empty inputs are allowed",
+			wantErr: false,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			spec := &ModelDeploymentSpec{
+				Engine: EngineSpec{
+					Args:      tc.args,
+					ExtraArgs: tc.extraArgs,
+				},
+			}
+			err := spec.ValidateEngineArgs()
+			if tc.wantErr {
+				if err == nil {
+					t.Fatalf("expected an error, got nil")
+				}
+				for _, want := range []string{"spec.engine.args", "spec.engine.extraArgs"} {
+					if !strings.Contains(err.Error(), want) {
+						t.Errorf("error %q should mention %q", err.Error(), want)
+					}
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+		})
+	}
+}
diff --git a/controller/internal/webhook/v1alpha1/modeldeployment_webhook.go b/controller/internal/webhook/v1alpha1/modeldeployment_webhook.go
@@ -271,6 +271,19 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
 		))
 	}
 
+	// Reject a launch flag set in both spec.engine.args and spec.engine.extraArgs
+	// at admission, so the conflict fails the apply/patch synchronously instead of
+	// being admitted and then surfacing asynchronously as a Failed reconcile. The
+	// provider transforms re-check this as a backstop. Provider-agnostic: the
+	// provider is frequently auto-selected and unknown at admission time.
+	if err := spec.ValidateEngineArgs(); err != nil {
+		allErrs = append(allErrs, field.Invalid(
+			specPath.Child("engine", "extraArgs"),
+			spec.Engine.ExtraArgs,
+			err.Error(),
+		))
+	}
+
 	// Validate model.id is required for huggingface source
 	if spec.Model.Source == airunwayv1alpha1.ModelSourceHuggingFace || spec.Model.Source == "" {
 		if spec.Model.ID == "" {
diff --git a/controller/internal/webhook/v1alpha1/modeldeployment_webhook_test.go b/controller/internal/webhook/v1alpha1/modeldeployment_webhook_test.go
@@ -370,6 +370,39 @@ var _ = Describe("ModelDeployment Webhook", func() {
 			Expect(warnings).To(BeEmpty())
 		})
 
+		It("Should reject a flag set in both engine.args and engine.extraArgs on create", func() {
+			obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
+			obj.Spec.Engine.Args = map[string]string{"tensor-parallel-size": "4"}
+			obj.Spec.Engine.ExtraArgs = []string{"--tensor-parallel-size=2"}
+
+			_, err := validator.ValidateCreate(ctx, obj)
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("tensor-parallel-size"))
+			Expect(err.Error()).To(ContainSubstring("spec.engine.args"))
+			Expect(err.Error()).To(ContainSubstring("spec.engine.extraArgs"))
+		})
+
+		It("Should reject a flag set in both engine.args and engine.extraArgs on update", func() {
+			oldObj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
+
+			obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
+			obj.Spec.Engine.Args = map[string]string{"tensor-parallel-size": "4"}
+			obj.Spec.Engine.ExtraArgs = []string{"--tensor-parallel-size", "2"}
+
+			_, err := validator.ValidateUpdate(ctx, oldObj, obj)
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("tensor-parallel-size"))
+		})
+
+		It("Should admit disjoint engine.args and engine.extraArgs", func() {
+			obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
+			obj.Spec.Engine.Args = map[string]string{"gpu-memory-utilization": "0.9"}
+			obj.Spec.Engine.ExtraArgs = []string{"--enable-chunked-prefill", "--max-num-seqs=64"}
+
+			_, err := validator.ValidateCreate(ctx, obj)
+			Expect(err).NotTo(HaveOccurred())
+		})
+
 		It("Should admit a single modelCache volume", func() {
 			obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
 			obj.Spec.Model.Storage = &airunwayv1alpha1.StorageSpec{
diff --git a/providers/vllm/transformer.go b/providers/vllm/transformer.go
@@ -397,7 +397,11 @@ func (t *Transformer) buildVLLMArgs(md *airunwayv1alpha1.ModelDeployment, kvTran
 		return nil, err
 	}
 
-	if err := validateDuplicateVLLMArgKeys(md.Spec.Engine.Args, md.Spec.Engine.ExtraArgs); err != nil {
+	// Reconcile-time backstop for the admission-time check: a launch flag must
+	// not be set in both spec.engine.args and spec.engine.extraArgs (the webhook
+	// rejects this synchronously; we re-check here so a transform invoked outside
+	// the webhook path still refuses to render conflicting duplicates).
+	if err := md.Spec.ValidateEngineArgs(); err != nil {
 		return nil, err
 	}
 
@@ -513,33 +517,6 @@ func validateReservedVLLMServerArgs(engineArgs map[string]string, extraArgs []st
 	return nil
 }
 
-// validateDuplicateVLLMArgKeys rejects a launch flag that is set in BOTH
-// spec.engine.args (the structured map) and spec.engine.extraArgs (raw tokens).
-// engine.args is a map, so a key can appear there at most once; finding the same
-// key again in extraArgs is an unambiguous contradiction. We render engine.args
-// first and then append extraArgs verbatim, so without this guard the rendered
-// command would carry two conflicting copies of the flag (e.g.
-// "--tensor-parallel-size 4 … --tensor-parallel-size=2"). vLLM's argparse is
-// last-wins, so it would silently honor the extraArgs value and defeat the
-// engine.args one. Surface the conflict as a clear error instead of guessing a
-// winner; the user removes one of the two settings. Flags that legitimately
-// repeat live only in extraArgs and are untouched by this check.
-func validateDuplicateVLLMArgKeys(engineArgs map[string]string, extraArgs []string) error {
-	if len(engineArgs) == 0 {
-		return nil
-	}
-	for _, arg := range extraArgs {
-		key, ok := extraArgKey(arg)
-		if !ok {
-			continue
-		}
-		if _, dup := engineArgs[key]; dup {
-			return fmt.Errorf("launch flag %q is set in both spec.engine.args and spec.engine.extraArgs (%q); set it in exactly one place so vLLM does not receive conflicting values", key, arg)
-		}
-	}
-	return nil
-}
-
 func isReservedVLLMServerArg(key string) bool {
 	// Normalize the key the same way for both the engineArgs map form ("port")
 	// and a user that writes the flag form as a map key ("--port") or with an