Skip to content

Commit dd71f59

Browse files
committed
fix(controller): reject conflicting engine args at admission
A launch flag set in both `spec.engine.args` and `spec.engine.extraArgs` was only caught by the vLLM transform at reconcile time, so `kubectl apply/patch` was admitted and the `ModelDeployment` flipped to `Failed` asynchronously instead of the apply failing outright. The same latent conflict also affected `llm-d`, which likewise appends `extraArgs` verbatim after `engine.args`. - Add a provider-agnostic `ValidateEngineArgs()` to `ModelDeploymentSpec` (with an `extraArgFlagKey` helper) that rejects a flag key present in both `engine.args` and `engine.extraArgs`; flags that legitimately repeat live only in `extraArgs` and are untouched. - Call it from the validating webhook so the conflict fails the apply/patch synchronously (the provider is often auto-selected and unknown at admission, so the check is provider-agnostic). - Delegate the vLLM transform to the shared method as a reconcile-time backstop, replacing the bespoke `validateDuplicateVLLMArgKeys`. - Add unit tests for `ValidateEngineArgs` and webhook tests covering create, update, and the disjoint (no-conflict) case. Signed-off-by: Suraj Deshmukh <suraj.deshmukh@microsoft.com>
1 parent e5e2a0a commit dd71f59

5 files changed

Lines changed: 210 additions & 29 deletions

File tree

controller/api/v1alpha1/modeldeployment_validation.go

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package v1alpha1
22

3-
import "fmt"
3+
import (
4+
"fmt"
5+
"strings"
6+
)
47

58
// ValidateImageFields verifies the legacy image override and the engine image
69
// override do not request different container images.
@@ -27,3 +30,61 @@ func (spec *ModelDeploymentSpec) ImageOverride() string {
2730
}
2831
return spec.Image
2932
}
33+
34+
// ValidateEngineArgs verifies that a launch flag is not set in BOTH
35+
// spec.engine.args (the structured map) and spec.engine.extraArgs (raw tokens).
36+
//
37+
// engine.args is a map, so a key can appear there at most once; finding the same
38+
// key again in extraArgs is an unambiguous contradiction. Providers that consume
39+
// both fields (Direct vLLM, llm-d) render engine.args first and then append
40+
// extraArgs verbatim, so such a collision would emit two conflicting copies of
41+
// the flag (e.g. "--tensor-parallel-size 4 … --tensor-parallel-size=2"). Engines
42+
// like vLLM parse last-wins, so the extraArgs value would silently defeat the
43+
// engine.args one. Reject the contradiction instead of guessing a winner; the
44+
// user sets the flag in exactly one place.
45+
//
46+
// This is provider-agnostic on purpose: it runs at admission for every
47+
// ModelDeployment (the provider is frequently auto-selected and unknown at
48+
// admission time) and is re-checked by the relevant provider transforms as a
49+
// reconcile-time backstop. Flags that legitimately repeat live only in extraArgs
50+
// and are untouched by this check.
51+
func (spec *ModelDeploymentSpec) ValidateEngineArgs() error {
52+
if len(spec.Engine.Args) == 0 || len(spec.Engine.ExtraArgs) == 0 {
53+
return nil
54+
}
55+
for _, arg := range spec.Engine.ExtraArgs {
56+
key, ok := extraArgFlagKey(arg)
57+
if !ok {
58+
continue
59+
}
60+
if _, dup := spec.Engine.Args[key]; dup {
61+
return fmt.Errorf(
62+
"launch flag %q is set in both spec.engine.args and spec.engine.extraArgs (%q); set it in exactly one place so the engine does not receive conflicting values",
63+
key, arg,
64+
)
65+
}
66+
}
67+
return nil
68+
}
69+
70+
// extraArgFlagKey extracts the bare flag name from a raw extraArgs token,
71+
// stripping the leading "--" and any "=value" suffix. It returns ok=false for
72+
// tokens that are not "--flag" style (bare values, single-dash tokens, "--"),
73+
// which therefore cannot collide with a structured engine.args key.
74+
func extraArgFlagKey(arg string) (string, bool) {
75+
if !strings.HasPrefix(arg, "--") || len(arg) <= 2 {
76+
return "", false
77+
}
78+
body := strings.TrimPrefix(arg, "--")
79+
if strings.HasPrefix(body, "-") {
80+
return "", false
81+
}
82+
if equalIndex := strings.Index(body, "="); equalIndex >= 0 {
83+
body = body[:equalIndex]
84+
}
85+
body = strings.TrimSpace(body)
86+
if body == "" {
87+
return "", false
88+
}
89+
return body, true
90+
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
Copyright 2026.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
"strings"
21+
"testing"
22+
)
23+
24+
func TestValidateEngineArgs(t *testing.T) {
25+
for _, tc := range []struct {
26+
name string
27+
args map[string]string
28+
extraArgs []string
29+
wantErr bool
30+
}{
31+
{
32+
name: "no overlap is allowed",
33+
args: map[string]string{"gpu-memory-utilization": "0.9"},
34+
extraArgs: []string{
35+
"--enable-chunked-prefill",
36+
"--max-num-seqs=64",
37+
},
38+
wantErr: false,
39+
},
40+
{
41+
name: "same key inline-value form in both is rejected",
42+
args: map[string]string{"tensor-parallel-size": "4"},
43+
extraArgs: []string{"--tensor-parallel-size=2"},
44+
wantErr: true,
45+
},
46+
{
47+
name: "same key two-token form in both is rejected",
48+
args: map[string]string{"tensor-parallel-size": "4"},
49+
extraArgs: []string{"--tensor-parallel-size", "2"},
50+
wantErr: true,
51+
},
52+
{
53+
name: "extraArgs-only flag is allowed (no conflict)",
54+
extraArgs: []string{"--tensor-parallel-size=2"},
55+
wantErr: false,
56+
},
57+
{
58+
name: "args-only flag is allowed (no conflict)",
59+
args: map[string]string{"tensor-parallel-size": "4"},
60+
wantErr: false,
61+
},
62+
{
63+
name: "bare positional token never collides",
64+
args: map[string]string{"tensor-parallel-size": "4"},
65+
extraArgs: []string{"tensor-parallel-size"},
66+
wantErr: false,
67+
},
68+
{
69+
name: "empty inputs are allowed",
70+
wantErr: false,
71+
},
72+
} {
73+
t.Run(tc.name, func(t *testing.T) {
74+
spec := &ModelDeploymentSpec{
75+
Engine: EngineSpec{
76+
Args: tc.args,
77+
ExtraArgs: tc.extraArgs,
78+
},
79+
}
80+
err := spec.ValidateEngineArgs()
81+
if tc.wantErr {
82+
if err == nil {
83+
t.Fatalf("expected an error, got nil")
84+
}
85+
for _, want := range []string{"spec.engine.args", "spec.engine.extraArgs"} {
86+
if !strings.Contains(err.Error(), want) {
87+
t.Errorf("error %q should mention %q", err.Error(), want)
88+
}
89+
}
90+
return
91+
}
92+
if err != nil {
93+
t.Fatalf("unexpected error: %v", err)
94+
}
95+
})
96+
}
97+
}

controller/internal/webhook/v1alpha1/modeldeployment_webhook.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,19 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
271271
))
272272
}
273273

274+
// Reject a launch flag set in both spec.engine.args and spec.engine.extraArgs
275+
// at admission, so the conflict fails the apply/patch synchronously instead of
276+
// being admitted and then surfacing asynchronously as a Failed reconcile. The
277+
// provider transforms re-check this as a backstop. Provider-agnostic: the
278+
// provider is frequently auto-selected and unknown at admission time.
279+
if err := spec.ValidateEngineArgs(); err != nil {
280+
allErrs = append(allErrs, field.Invalid(
281+
specPath.Child("engine", "extraArgs"),
282+
spec.Engine.ExtraArgs,
283+
err.Error(),
284+
))
285+
}
286+
274287
// Validate model.id is required for huggingface source
275288
if spec.Model.Source == airunwayv1alpha1.ModelSourceHuggingFace || spec.Model.Source == "" {
276289
if spec.Model.ID == "" {

controller/internal/webhook/v1alpha1/modeldeployment_webhook_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,39 @@ var _ = Describe("ModelDeployment Webhook", func() {
370370
Expect(warnings).To(BeEmpty())
371371
})
372372

373+
It("Should reject a flag set in both engine.args and engine.extraArgs on create", func() {
374+
obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
375+
obj.Spec.Engine.Args = map[string]string{"tensor-parallel-size": "4"}
376+
obj.Spec.Engine.ExtraArgs = []string{"--tensor-parallel-size=2"}
377+
378+
_, err := validator.ValidateCreate(ctx, obj)
379+
Expect(err).To(HaveOccurred())
380+
Expect(err.Error()).To(ContainSubstring("tensor-parallel-size"))
381+
Expect(err.Error()).To(ContainSubstring("spec.engine.args"))
382+
Expect(err.Error()).To(ContainSubstring("spec.engine.extraArgs"))
383+
})
384+
385+
It("Should reject a flag set in both engine.args and engine.extraArgs on update", func() {
386+
oldObj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
387+
388+
obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
389+
obj.Spec.Engine.Args = map[string]string{"tensor-parallel-size": "4"}
390+
obj.Spec.Engine.ExtraArgs = []string{"--tensor-parallel-size", "2"}
391+
392+
_, err := validator.ValidateUpdate(ctx, oldObj, obj)
393+
Expect(err).To(HaveOccurred())
394+
Expect(err.Error()).To(ContainSubstring("tensor-parallel-size"))
395+
})
396+
397+
It("Should admit disjoint engine.args and engine.extraArgs", func() {
398+
obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
399+
obj.Spec.Engine.Args = map[string]string{"gpu-memory-utilization": "0.9"}
400+
obj.Spec.Engine.ExtraArgs = []string{"--enable-chunked-prefill", "--max-num-seqs=64"}
401+
402+
_, err := validator.ValidateCreate(ctx, obj)
403+
Expect(err).NotTo(HaveOccurred())
404+
})
405+
373406
It("Should admit a single modelCache volume", func() {
374407
obj.Spec.Model.ID = "meta-llama/Llama-2-7b-chat-hf"
375408
obj.Spec.Model.Storage = &airunwayv1alpha1.StorageSpec{

providers/vllm/transformer.go

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,11 @@ func (t *Transformer) buildVLLMArgs(md *airunwayv1alpha1.ModelDeployment, kvTran
397397
return nil, err
398398
}
399399

400-
if err := validateDuplicateVLLMArgKeys(md.Spec.Engine.Args, md.Spec.Engine.ExtraArgs); err != nil {
400+
// Reconcile-time backstop for the admission-time check: a launch flag must
401+
// not be set in both spec.engine.args and spec.engine.extraArgs (the webhook
402+
// rejects this synchronously; we re-check here so a transform invoked outside
403+
// the webhook path still refuses to render conflicting duplicates).
404+
if err := md.Spec.ValidateEngineArgs(); err != nil {
401405
return nil, err
402406
}
403407

@@ -513,33 +517,6 @@ func validateReservedVLLMServerArgs(engineArgs map[string]string, extraArgs []st
513517
return nil
514518
}
515519

516-
// validateDuplicateVLLMArgKeys rejects a launch flag that is set in BOTH
517-
// spec.engine.args (the structured map) and spec.engine.extraArgs (raw tokens).
518-
// engine.args is a map, so a key can appear there at most once; finding the same
519-
// key again in extraArgs is an unambiguous contradiction. We render engine.args
520-
// first and then append extraArgs verbatim, so without this guard the rendered
521-
// command would carry two conflicting copies of the flag (e.g.
522-
// "--tensor-parallel-size 4 … --tensor-parallel-size=2"). vLLM's argparse is
523-
// last-wins, so it would silently honor the extraArgs value and defeat the
524-
// engine.args one. Surface the conflict as a clear error instead of guessing a
525-
// winner; the user removes one of the two settings. Flags that legitimately
526-
// repeat live only in extraArgs and are untouched by this check.
527-
func validateDuplicateVLLMArgKeys(engineArgs map[string]string, extraArgs []string) error {
528-
if len(engineArgs) == 0 {
529-
return nil
530-
}
531-
for _, arg := range extraArgs {
532-
key, ok := extraArgKey(arg)
533-
if !ok {
534-
continue
535-
}
536-
if _, dup := engineArgs[key]; dup {
537-
return fmt.Errorf("launch flag %q is set in both spec.engine.args and spec.engine.extraArgs (%q); set it in exactly one place so vLLM does not receive conflicting values", key, arg)
538-
}
539-
}
540-
return nil
541-
}
542-
543520
func isReservedVLLMServerArg(key string) bool {
544521
// Normalize the key the same way for both the engineArgs map form ("port")
545522
// and a user that writes the flag form as a map key ("--port") or with an

0 commit comments

Comments
 (0)