Skip to content

Commit 7a1e4e1

Browse files
authored
feat(dynamo): add CPU-only mocker mode for GPU-less E2E coverage (kaito-project#307)
Signed-off-by: Suraj Deshmukh <suraj.deshmukh@microsoft.com>
1 parent 1024da7 commit 7a1e4e1

16 files changed

Lines changed: 1437 additions & 27 deletions
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
name: E2E Dynamo Mocker Tests
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch:
9+
10+
permissions:
11+
contents: read
12+
13+
jobs:
14+
e2e-dynamo-mocker:
15+
runs-on: ubuntu-latest-16-cores
16+
timeout-minutes: 45
17+
18+
steps:
19+
- name: Checkout repository
20+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
21+
22+
- name: Setup Go
23+
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
24+
with:
25+
go-version: "1.25"
26+
cache-dependency-path: providers/dynamo/go.sum
27+
28+
- name: Setup Bun
29+
# Required by the Makefile's verify-versions target (TS version-sync check),
30+
# which setup-dynamo-mocker depends on via the root Makefile.
31+
uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
32+
with:
33+
bun-version: latest
34+
35+
- name: Setup Kind
36+
run: |
37+
go install sigs.k8s.io/kind@latest
38+
kind create cluster --name airunway-e2e --wait 120s
39+
40+
- name: Install Dynamo platform (CPU/mocker)
41+
run: |
42+
# CPU-only install: no GPU pre-deployment check, no GAIE. The mocker
43+
# backend runs python3 -m dynamo.mocker and needs no GPUs.
44+
make -C providers/dynamo setup-dynamo-mocker
45+
kubectl wait --for=condition=Available deployment -n dynamo-system --all --timeout=300s
46+
47+
- name: Build and deploy controller
48+
run: |
49+
make controller-docker-build CONTROLLER_IMG=airunway-controller:e2e
50+
kind load docker-image airunway-controller:e2e --name airunway-e2e
51+
make controller-deploy CONTROLLER_IMG=airunway-controller:e2e
52+
kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=controller-manager --timeout=120s
53+
54+
- name: Build and deploy Dynamo provider
55+
run: |
56+
make -C providers/dynamo docker-build IMG=dynamo-provider:e2e
57+
kind load docker-image dynamo-provider:e2e --name airunway-e2e
58+
make -C providers/dynamo deploy IMG=dynamo-provider:e2e
59+
kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=dynamo-provider --timeout=120s
60+
61+
- name: Wait for provider registration
62+
run: |
63+
kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/dynamo --timeout=120s
64+
65+
- name: Run mocker E2E (aggregated + disaggregated)
66+
run: |
67+
make -C providers/dynamo test-e2e-mocker
68+
69+
- name: Collect debug info
70+
if: failure()
71+
run: |
72+
echo "=== ModelDeployments ==="
73+
kubectl get modeldeployments -A -o yaml
74+
echo "=== DynamoGraphDeployments ==="
75+
kubectl get dynamographdeployments.nvidia.com -A -o yaml
76+
echo "=== InferenceProviderConfigs ==="
77+
kubectl get inferenceproviderconfigs -o yaml
78+
echo "=== Controller Logs ==="
79+
kubectl logs -n airunway-system -l control-plane=controller-manager --tail=200
80+
echo "=== Dynamo Provider Logs ==="
81+
kubectl logs -n airunway-system -l control-plane=dynamo-provider --tail=200
82+
echo "=== Dynamo Operator Logs ==="
83+
kubectl logs -n dynamo-system --all-containers --tail=200 --prefix
84+
echo "=== Events ==="
85+
kubectl get events -A --sort-by=.lastTimestamp
86+
echo "=== Pods ==="
87+
kubectl get pods -A
88+
89+
- name: Cleanup
90+
if: always()
91+
run: |
92+
kind delete cluster --name airunway-e2e

controller/internal/controller/gateway_reconciler.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,18 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
5656
return nil
5757
}
5858

59+
// Skip for the Dynamo mocker test backend. Mocker mode deploys a standalone
60+
// Frontend and intentionally does not create a provider-managed
61+
// InferencePool/EPP, so engaging gateway reconciliation here would wait
62+
// forever on a pool that never appears (NotFound retries / a misleading
63+
// GatewayReady=False status) on any cluster that does have the GAIE CRDs
64+
// installed. This keeps the controller consistent with the dynamo
65+
// transformer, which also forces the non-gateway path in mocker mode.
66+
if isDynamoMockerMode(md) {
67+
logger.V(1).Info("Skipping gateway reconciliation for Dynamo mocker test backend", "name", md.Name)
68+
return nil
69+
}
70+
5971
// Skip if gateway CRDs are not available
6072
if !r.GatewayDetector.IsAvailable(ctx) {
6173
// Warn if user explicitly enabled gateway but CRDs are missing
@@ -851,6 +863,26 @@ func resolvedProviderName(md *airunwayv1alpha1.ModelDeployment) string {
851863
return ""
852864
}
853865

866+
// dynamoMockerAnnotation / dynamoMockerValue select the Dynamo provider's
867+
// internal, test-only mocker backend. The key is kept as a literal here (rather
868+
// than importing providers/dynamo) so the controller has no build dependency on
869+
// an out-of-tree provider module — see providers/dynamo/mocker.go
870+
// (AnnotationDynamoTestBackend / DynamoTestBackendMocker).
871+
const (
872+
dynamoMockerAnnotation = "airunway.ai/dynamo-test-backend"
873+
dynamoMockerValue = "mocker"
874+
)
875+
876+
// isDynamoMockerMode reports whether the ModelDeployment opts into the Dynamo
877+
// mocker test backend on the dynamo provider. Mocker mode runs the GPU-less
878+
// python3 -m dynamo.mocker behind a standalone Frontend and intentionally does
879+
// not create an InferencePool/EPP, so the controller must skip the GPU-oriented
880+
// validation and gateway/GAIE reconciliation it would otherwise apply.
881+
func isDynamoMockerMode(md *airunwayv1alpha1.ModelDeployment) bool {
882+
return md.Annotations[dynamoMockerAnnotation] == dynamoMockerValue &&
883+
md.Spec.Provider != nil && md.Spec.Provider.Name == "dynamo"
884+
}
885+
854886
// resolveServicePort looks up the first HTTP port on the named service.
855887
func (r *ModelDeploymentReconciler) resolveServicePort(ctx context.Context, serviceName, namespace string) int32 {
856888
var svc corev1.Service

controller/internal/controller/gateway_reconciler_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"testing"
2323

24+
"k8s.io/apimachinery/pkg/api/meta"
2425
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2526
"k8s.io/apimachinery/pkg/runtime"
2627
"k8s.io/apimachinery/pkg/types"
@@ -268,6 +269,41 @@ func TestGateway_HTTPRouteCreation(t *testing.T) {
268269
}
269270
}
270271

272+
func TestGateway_DynamoMockerSkipsCreation(t *testing.T) {
273+
scheme := newTestScheme()
274+
md := newModelDeployment("test-model", "default")
275+
// Gateway is left at its default (enabled) and the GAIE CRDs are available,
276+
// so only the mocker annotation should keep the controller off the gateway
277+
// path. The dynamo standalone-Frontend mocker DGD never creates a
278+
// provider-managed InferencePool, so engaging gateway would loop on NotFound.
279+
md.Spec.Provider = &airunwayv1alpha1.ProviderSpec{Name: "dynamo"}
280+
md.Annotations = map[string]string{"airunway.ai/dynamo-test-backend": "mocker"}
281+
detector := fakeDetector(true, "my-gateway", "gateway-ns")
282+
r := newTestReconciler(scheme, detector, md)
283+
ctx := context.Background()
284+
285+
if err := r.reconcileGateway(ctx, md); err != nil {
286+
t.Fatalf("reconcileGateway failed: %v", err)
287+
}
288+
289+
// No InferencePool should be created.
290+
var pool inferencev1.InferencePool
291+
if err := r.Get(ctx, types.NamespacedName{Name: "test-model", Namespace: "default"}, &pool); err == nil {
292+
t.Error("expected InferencePool to NOT be created in dynamo mocker mode")
293+
}
294+
295+
// No HTTPRoute should be created.
296+
var route gatewayv1.HTTPRoute
297+
if err := r.Get(ctx, types.NamespacedName{Name: "test-model", Namespace: "default"}, &route); err == nil {
298+
t.Error("expected HTTPRoute to NOT be created in dynamo mocker mode")
299+
}
300+
301+
// And no GatewayReady condition should have been set (neither true nor false).
302+
if c := meta.FindStatusCondition(md.Status.Conditions, airunwayv1alpha1.ConditionTypeGatewayReady); c != nil {
303+
t.Errorf("expected no GatewayReady condition in mocker mode, got %q/%q", c.Status, c.Reason)
304+
}
305+
}
306+
271307
func TestGateway_DisabledSkipsCreation(t *testing.T) {
272308
scheme := newTestScheme()
273309
md := newModelDeployment("test-model", "default")

controller/internal/controller/modeldeployment_controller.go

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,17 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
368368
return fmt.Errorf("engine.type must be specified or auto-selected from provider capabilities")
369369
}
370370

371+
// Mocker mode escape hatch: a ModelDeployment annotated with
372+
// airunway.ai/dynamo-test-backend=mocker targeting the dynamo provider runs
373+
// the GPU-less python3 -m dynamo.mocker backend, so the GPU compatibility and
374+
// disaggregated gpu.count checks below must not reject it. This mirrors the
375+
// admission webhook (see modeldeployment_webhook.go) so the two cannot drift.
376+
// Mocker is vLLM-only.
377+
isDynamoMocker := isDynamoMockerMode(md)
378+
if isDynamoMocker && engineType != airunwayv1alpha1.EngineTypeVLLM {
379+
return fmt.Errorf("the dynamo mocker test backend only supports the vllm engine")
380+
}
381+
371382
// Validate provider/engine/serving-mode/GPU-CPU compatibility via the
372383
// shared helper so the webhook and reconciler cannot drift.
373384
gpuCount := int32(0)
@@ -385,17 +396,19 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
385396
}
386397
}
387398
}
388-
if ces := validation.CheckProviderCompatibility(
389-
providerName,
390-
namedConfig,
391-
providerConfigs,
392-
engineType,
393-
servingMode,
394-
gpuCount,
395-
); len(ces) > 0 {
396-
// Return the first error to preserve the reconciler's existing
397-
// single-error contract.
398-
return fmt.Errorf("%s", ces[0].Message)
399+
if !isDynamoMocker {
400+
if ces := validation.CheckProviderCompatibility(
401+
providerName,
402+
namedConfig,
403+
providerConfigs,
404+
engineType,
405+
servingMode,
406+
gpuCount,
407+
); len(ces) > 0 {
408+
// Return the first error to preserve the reconciler's existing
409+
// single-error contract.
410+
return fmt.Errorf("%s", ces[0].Message)
411+
}
399412
}
400413

401414
// Validate disaggregated mode configuration
@@ -410,14 +423,19 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
410423
return fmt.Errorf("disaggregated mode requires scaling.prefill and scaling.decode")
411424
}
412425

413-
// Prefill must have GPU
414-
if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
415-
return fmt.Errorf("disaggregated mode requires scaling.prefill.gpu.count > 0")
416-
}
426+
// The GPU-less mocker backend waives the per-component gpu.count
427+
// requirement, but the prefill/decode blocks themselves are still
428+
// required (above) so the dynamo transformer can build both workers.
429+
if !isDynamoMocker {
430+
// Prefill must have GPU
431+
if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
432+
return fmt.Errorf("disaggregated mode requires scaling.prefill.gpu.count > 0")
433+
}
417434

418-
// Decode must have GPU
419-
if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
420-
return fmt.Errorf("disaggregated mode requires scaling.decode.gpu.count > 0")
435+
// Decode must have GPU
436+
if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
437+
return fmt.Errorf("disaggregated mode requires scaling.decode.gpu.count > 0")
438+
}
421439
}
422440
}
423441

controller/internal/controller/validate_spec_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,73 @@ func TestValidateSpec(t *testing.T) {
126126
},
127127
providerConfigs: allProviders(),
128128
},
129+
{
130+
name: "valid: CPU-only aggregated vllm on dynamo with mocker annotation",
131+
md: airunwayv1alpha1.ModelDeployment{
132+
ObjectMeta: metav1.ObjectMeta{
133+
Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
134+
},
135+
Spec: airunwayv1alpha1.ModelDeploymentSpec{
136+
Model: airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
137+
Engine: airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
138+
Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
139+
// No resources.gpu: the GPU-less mocker backend waives it.
140+
},
141+
},
142+
providerConfigs: allProviders(),
143+
},
144+
{
145+
name: "valid: CPU-only disaggregated vllm on dynamo with mocker annotation",
146+
md: airunwayv1alpha1.ModelDeployment{
147+
ObjectMeta: metav1.ObjectMeta{
148+
Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
149+
},
150+
Spec: airunwayv1alpha1.ModelDeploymentSpec{
151+
Model: airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
152+
Engine: airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
153+
Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
154+
Serving: &airunwayv1alpha1.ServingSpec{Mode: airunwayv1alpha1.ServingModeDisaggregated},
155+
// prefill/decode blocks present but no gpu.count.
156+
Scaling: &airunwayv1alpha1.ScalingSpec{
157+
Prefill: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
158+
Decode: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
159+
},
160+
},
161+
},
162+
providerConfigs: allProviders(),
163+
},
164+
{
165+
name: "invalid: non-vllm engine on dynamo even with mocker annotation",
166+
md: airunwayv1alpha1.ModelDeployment{
167+
ObjectMeta: metav1.ObjectMeta{
168+
Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
169+
},
170+
Spec: airunwayv1alpha1.ModelDeploymentSpec{
171+
Model: airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
172+
Engine: airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeSGLang},
173+
Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
174+
},
175+
},
176+
providerConfigs: allProviders(),
177+
wantErr: "only supports the vllm engine",
178+
},
179+
{
180+
name: "invalid: CPU-only disaggregated vllm on dynamo WITHOUT mocker annotation",
181+
md: airunwayv1alpha1.ModelDeployment{
182+
Spec: airunwayv1alpha1.ModelDeploymentSpec{
183+
Model: airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
184+
Engine: airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
185+
Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
186+
Serving: &airunwayv1alpha1.ServingSpec{Mode: airunwayv1alpha1.ServingModeDisaggregated},
187+
Scaling: &airunwayv1alpha1.ScalingSpec{
188+
Prefill: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
189+
Decode: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
190+
},
191+
},
192+
},
193+
providerConfigs: allProviders(),
194+
wantErr: "scaling.prefill.gpu.count > 0",
195+
},
129196
{
130197
name: "valid: llamacpp CPU-only on kaito",
131198
md: airunwayv1alpha1.ModelDeployment{

controller/internal/webhook/v1alpha1/modeldeployment_webhook.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,32 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
291291
// admission; falls back to the uncached APIReader only when the cache
292292
// reports NotFound, to absorb the race where a brand-new
293293
// InferenceProviderConfig hasn't yet propagated to informers.
294-
if spec.Provider != nil && spec.Provider.Name != "" && spec.Engine.Type != "" && v.Reader != nil {
294+
//
295+
// Mocker mode escape hatch: a ModelDeployment annotated with
296+
// airunway.ai/dynamo-test-backend=mocker targeting the dynamo provider runs
297+
// the GPU-less python3 -m dynamo.mocker backend, so the provider's GPU
298+
// capability check must not reject it at admission. This is a test-only path
299+
// (the dynamo provider re-validates compatibility during reconciliation).
300+
// The annotation key is kept as a literal here to avoid importing the
301+
// provider module from the controller webhook (see
302+
// providers/dynamo/mocker.go AnnotationDynamoTestBackend / DynamoTestBackendMocker).
303+
isDynamoMocker := obj.Annotations["airunway.ai/dynamo-test-backend"] == "mocker" &&
304+
spec.Provider != nil && spec.Provider.Name == "dynamo"
305+
306+
// The Dynamo mocker backend only simulates the vLLM engine. Enforce the
307+
// vLLM-only constraint at admission so a non-vllm engine + mocker annotation
308+
// is rejected here rather than admitted and failing later during provider
309+
// reconciliation (the dynamo provider re-validates this too). An empty engine
310+
// type is allowed — the provider defaults it to vllm.
311+
if isDynamoMocker && spec.Engine.Type != "" && spec.Engine.Type != airunwayv1alpha1.EngineTypeVLLM {
312+
allErrs = append(allErrs, field.Invalid(
313+
specPath.Child("engine", "type"),
314+
spec.Engine.Type,
315+
"the dynamo mocker test backend only supports the vllm engine",
316+
))
317+
}
318+
319+
if !isDynamoMocker && spec.Provider != nil && spec.Provider.Name != "" && spec.Engine.Type != "" && v.Reader != nil {
295320
var providerConfig airunwayv1alpha1.InferenceProviderConfig
296321
err := v.Reader.Get(ctx, client.ObjectKey{Name: spec.Provider.Name}, &providerConfig)
297322
if apierrors.IsNotFound(err) && v.APIReader != nil {
@@ -383,7 +408,11 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
383408
specPath.Child("scaling", "prefill"),
384409
"disaggregated mode requires scaling.prefill",
385410
))
386-
} else {
411+
} else if !isDynamoMocker {
412+
// Mocker mode runs the GPU-less python3 -m dynamo.mocker backend,
413+
// so a CPU-only disaggregated mocker deployment legitimately omits
414+
// scaling.prefill.gpu.count. The prefill block itself is still
415+
// required (above) so the dynamo transformer can build the worker.
387416
if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
388417
allErrs = append(allErrs, field.Required(
389418
specPath.Child("scaling", "prefill", "gpu", "count"),
@@ -397,7 +426,9 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
397426
specPath.Child("scaling", "decode"),
398427
"disaggregated mode requires scaling.decode",
399428
))
400-
} else {
429+
} else if !isDynamoMocker {
430+
// See the prefill note above: mocker mode waives the GPU-count
431+
// requirement while still requiring the decode block.
401432
if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
402433
allErrs = append(allErrs, field.Required(
403434
specPath.Child("scaling", "decode", "gpu", "count"),

0 commit comments

Comments
 (0)