Skip to content

Commit 98583ce

Browse files
committed
fix(dynamo): reject non-vllm mocker at admission and fix review nits
Address Copilot review feedback on the Dynamo mocker E2E PR: - Enforce the vLLM-only constraint for mocker mode at admission time in the validating webhook, so a non-vllm `engine.type` combined with the `airunway.ai/dynamo-test-backend=mocker` annotation is rejected up front instead of being admitted and failing later during provider reconcile. Add a webhook test covering the `sglang`-on-`dynamo` rejection. - Fix stale comments in `buildAggregatedWorker`, `buildPrefillWorker`, and `buildDecodeWorker`: mocker mode does not drop all GPU/CPU requests, it replaces GPU resources with small CPU/memory requests+limits to keep the worker Burstable rather than BestEffort. - Rename the local `yaml` variable in `testCreateMockerModelDeployment` to `manifest` to stop it shadowing the `sigs.k8s.io/yaml` package import. - Drop the `-run TestDynamoMocker` filter from the `test-e2e-mocker` target so `TestInjectMockerAnnotation` also runs in CI; the GPU lane self-skips without `DYNAMO_INSTALLED`. Signed-off-by: Suraj Deshmukh <suraj.deshmukh@microsoft.com>
1 parent 8efb684 commit 98583ce

5 files changed

Lines changed: 40 additions & 11 deletions

File tree

controller/internal/webhook/v1alpha1/modeldeployment_webhook.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,19 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
303303
isDynamoMocker := obj.Annotations["airunway.ai/dynamo-test-backend"] == "mocker" &&
304304
spec.Provider != nil && spec.Provider.Name == "dynamo"
305305

306+
// The Dynamo mocker backend only simulates the vLLM engine. Enforce the
307+
// vLLM-only constraint at admission so a non-vllm engine + mocker annotation
308+
// is rejected here rather than admitted and failing later during provider
309+
// reconciliation (the dynamo provider re-validates this too). An empty engine
310+
// type is allowed — the provider defaults it to vllm.
311+
if isDynamoMocker && spec.Engine.Type != "" && spec.Engine.Type != airunwayv1alpha1.EngineTypeVLLM {
312+
allErrs = append(allErrs, field.Invalid(
313+
specPath.Child("engine", "type"),
314+
spec.Engine.Type,
315+
"the dynamo mocker test backend only supports the vllm engine",
316+
))
317+
}
318+
306319
if !isDynamoMocker && spec.Provider != nil && spec.Provider.Name != "" && spec.Engine.Type != "" && v.Reader != nil {
307320
var providerConfig airunwayv1alpha1.InferenceProviderConfig
308321
err := v.Reader.Get(ctx, client.ObjectKey{Name: spec.Provider.Name}, &providerConfig)

controller/internal/webhook/v1alpha1/modeldeployment_webhook_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,5 +1446,16 @@ var _ = Describe("ModelDeployment Webhook", func() {
14461446
Expect(err).To(HaveOccurred())
14471447
Expect(err.Error()).To(ContainSubstring("gpu.count > 0"))
14481448
})
1449+
1450+
It("Should reject a non-vllm engine on dynamo even with the mocker annotation", func() {
1451+
obj.Annotations = map[string]string{"airunway.ai/dynamo-test-backend": "mocker"}
1452+
obj.Spec.Model.ID = "Qwen/Qwen3-0.6B"
1453+
obj.Spec.Engine.Type = airunwayv1alpha1.EngineTypeSGLang
1454+
obj.Spec.Provider = &airunwayv1alpha1.ProviderSpec{Name: "dynamo"}
1455+
obj.Spec.Resources = &airunwayv1alpha1.ResourceSpec{GPU: &airunwayv1alpha1.GPUSpec{Count: 0}}
1456+
_, err := validator.ValidateCreate(ctx, obj)
1457+
Expect(err).To(HaveOccurred())
1458+
Expect(err.Error()).To(ContainSubstring("only supports the vllm engine"))
1459+
})
14491460
})
14501461
})

providers/dynamo/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,7 @@ test-e2e:
108108

109109
## Run the CPU-only mocker e2e tests (aggregated + disaggregated). Requires a cluster
110110
## with the Dynamo platform (see setup-dynamo-mocker) and the dynamo provider deployed.
111+
## No -run filter: the GPU lane (TestDynamoProviderE2E) self-skips without DYNAMO_INSTALLED,
112+
## so dropping it lets the unit-style TestInjectMockerAnnotation run here too.
111113
test-e2e-mocker:
112-
DYNAMO_MOCKER=true go test -count=1 -tags=e2e -v -timeout 30m ./test/e2e/ -run TestDynamoMocker
114+
DYNAMO_MOCKER=true go test -count=1 -tags=e2e -v -timeout 30m ./test/e2e/

providers/dynamo/test/e2e/dynamo_mocker_e2e_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,8 @@ func testCreateMockerModelDeployment(t *testing.T, tc mockerCase) {
152152
t.Fatalf("failed to read fixture %s: %v", path, err)
153153
}
154154

155-
yaml := injectMockerAnnotation(t, string(raw))
156-
out, err := kubectlApplyLiteral(t, yaml)
155+
manifest := injectMockerAnnotation(t, string(raw))
156+
out, err := kubectlApplyLiteral(t, manifest)
157157
if err != nil {
158158
t.Fatalf("failed to apply mocker ModelDeployment: %v\nOutput: %s", err, out)
159159
}

providers/dynamo/transformer.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -512,8 +512,9 @@ func (t *Transformer) buildAggregatedWorker(md *airunwayv1alpha1.ModelDeployment
512512

513513
command := t.engineCommand(md.ResolvedEngineType())
514514

515-
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and drop
516-
// all GPU/CPU resource requests so the worker schedules on CPU-only nodes.
515+
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and replace
516+
// the GPU resources with small CPU/memory requests+limits (no GPU) so the
517+
// worker schedules on CPU-only nodes while staying Burstable, not BestEffort.
517518
if isMockerMode(md) {
518519
command = mockerCommand()
519520
args = buildMockerArgs(md)
@@ -610,9 +611,10 @@ func (t *Transformer) buildPrefillWorker(md *airunwayv1alpha1.ModelDeployment, i
610611

611612
command := t.engineCommand(md.ResolvedEngineType())
612613

613-
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and drop
614-
// GPU resource requests. The mocker keeps --disaggregation-mode but does
615-
// NOT use --kv-transfer-config (that NIXL flag is real-vLLM-only).
614+
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and replace
615+
// the GPU resources with small CPU/memory requests+limits (no GPU). The mocker
616+
// keeps --disaggregation-mode but does NOT use --kv-transfer-config (that NIXL
617+
// flag is real-vLLM-only).
616618
if isMockerMode(md) {
617619
command = mockerCommand()
618620
args = append(buildMockerArgs(md), "--disaggregation-mode", SubComponentTypePrefill)
@@ -686,9 +688,10 @@ func (t *Transformer) buildDecodeWorker(md *airunwayv1alpha1.ModelDeployment, im
686688

687689
command := t.engineCommand(md.ResolvedEngineType())
688690

689-
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and drop
690-
// GPU resource requests. The mocker keeps --disaggregation-mode but does
691-
// NOT use --kv-transfer-config (that NIXL flag is real-vLLM-only).
691+
// Mocker mode: swap the real engine for python3 -m dynamo.mocker and replace
692+
// the GPU resources with small CPU/memory requests+limits (no GPU). The mocker
693+
// keeps --disaggregation-mode but does NOT use --kv-transfer-config (that NIXL
694+
// flag is real-vLLM-only).
692695
if isMockerMode(md) {
693696
command = mockerCommand()
694697
args = append(buildMockerArgs(md), "--disaggregation-mode", SubComponentTypeDecode)

0 commit comments

Comments
 (0)