surajssd
diff --git a/‎.github/workflows/e2e-dynamo-mocker.yml‎
Lines changed: 92 additions & 0 deletions b/‎.github/workflows/e2e-dynamo-mocker.yml‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎controller/internal/controller/gateway_reconciler.go‎
Lines changed: 32 additions & 0 deletions b/‎controller/internal/controller/gateway_reconciler.go‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎controller/internal/controller/gateway_reconciler_test.go‎
Lines changed: 36 additions & 0 deletions b/‎controller/internal/controller/gateway_reconciler_test.go‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎controller/internal/controller/modeldeployment_controller.go‎
Lines changed: 36 additions & 18 deletions b/‎controller/internal/controller/modeldeployment_controller.go‎
Lines changed: 36 additions & 18 deletions
diff --git a/‎controller/internal/controller/validate_spec_test.go‎
Lines changed: 67 additions & 0 deletions b/‎controller/internal/controller/validate_spec_test.go‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎controller/internal/webhook/v1alpha1/modeldeployment_webhook.go‎
Lines changed: 34 additions & 3 deletions b/‎controller/internal/webhook/v1alpha1/modeldeployment_webhook.go‎
Lines changed: 34 additions & 3 deletions
@@ -0,0 +1,92 @@
+name: E2E Dynamo Mocker Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  e2e-dynamo-mocker:
+    runs-on: ubuntu-latest-16-cores
+    timeout-minutes: 45
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup Go
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
+        with:
+          go-version: "1.25"
+          cache-dependency-path: providers/dynamo/go.sum
+
+      - name: Setup Bun
+        # Required by the Makefile's verify-versions target (TS version-sync check),
+        # which setup-dynamo-mocker depends on via the root Makefile.
+        uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
+        with:
+          bun-version: latest
+
+      - name: Setup Kind
+        run: |
+          go install sigs.k8s.io/kind@latest
+          kind create cluster --name airunway-e2e --wait 120s
+
+      - name: Install Dynamo platform (CPU/mocker)
+        run: |
+          # CPU-only install: no GPU pre-deployment check, no GAIE. The mocker
+          # backend runs python3 -m dynamo.mocker and needs no GPUs.
+          make -C providers/dynamo setup-dynamo-mocker
+          kubectl wait --for=condition=Available deployment -n dynamo-system --all --timeout=300s
+
+      - name: Build and deploy controller
+        run: |
+          make controller-docker-build CONTROLLER_IMG=airunway-controller:e2e
+          kind load docker-image airunway-controller:e2e --name airunway-e2e
+          make controller-deploy CONTROLLER_IMG=airunway-controller:e2e
+          kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=controller-manager --timeout=120s
+
+      - name: Build and deploy Dynamo provider
+        run: |
+          make -C providers/dynamo docker-build IMG=dynamo-provider:e2e
+          kind load docker-image dynamo-provider:e2e --name airunway-e2e
+          make -C providers/dynamo deploy IMG=dynamo-provider:e2e
+          kubectl wait --for=condition=Available deployment -n airunway-system -l control-plane=dynamo-provider --timeout=120s
+
+      - name: Wait for provider registration
+        run: |
+          kubectl wait --for=jsonpath='{.status.ready}'=true inferenceproviderconfig/dynamo --timeout=120s
+
+      - name: Run mocker E2E (aggregated + disaggregated)
+        run: |
+          make -C providers/dynamo test-e2e-mocker
+
+      - name: Collect debug info
+        if: failure()
+        run: |
+          echo "=== ModelDeployments ==="
+          kubectl get modeldeployments -A -o yaml
+          echo "=== DynamoGraphDeployments ==="
+          kubectl get dynamographdeployments.nvidia.com -A -o yaml
+          echo "=== InferenceProviderConfigs ==="
+          kubectl get inferenceproviderconfigs -o yaml
+          echo "=== Controller Logs ==="
+          kubectl logs -n airunway-system -l control-plane=controller-manager --tail=200
+          echo "=== Dynamo Provider Logs ==="
+          kubectl logs -n airunway-system -l control-plane=dynamo-provider --tail=200
+          echo "=== Dynamo Operator Logs ==="
+          kubectl logs -n dynamo-system --all-containers --tail=200 --prefix
+          echo "=== Events ==="
+          kubectl get events -A --sort-by=.lastTimestamp
+          echo "=== Pods ==="
+          kubectl get pods -A
+
+      - name: Cleanup
+        if: always()
+        run: |
+          kind delete cluster --name airunway-e2e
@@ -56,6 +56,18 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
 		return nil
 	}
 
+	// Skip for the Dynamo mocker test backend. Mocker mode deploys a standalone
+	// Frontend and intentionally does not create a provider-managed
+	// InferencePool/EPP, so engaging gateway reconciliation here would wait
+	// forever on a pool that never appears (NotFound retries / a misleading
+	// GatewayReady=False status) on any cluster that does have the GAIE CRDs
+	// installed. This keeps the controller consistent with the dynamo
+	// transformer, which also forces the non-gateway path in mocker mode.
+	if isDynamoMockerMode(md) {
+		logger.V(1).Info("Skipping gateway reconciliation for Dynamo mocker test backend", "name", md.Name)
+		return nil
+	}
+
 	// Skip if gateway CRDs are not available
 	if !r.GatewayDetector.IsAvailable(ctx) {
 		// Warn if user explicitly enabled gateway but CRDs are missing
@@ -851,6 +863,26 @@ func resolvedProviderName(md *airunwayv1alpha1.ModelDeployment) string {
 	return ""
 }
 
+// dynamoMockerAnnotation / dynamoMockerValue select the Dynamo provider's
+// internal, test-only mocker backend. The key is kept as a literal here (rather
+// than importing providers/dynamo) so the controller has no build dependency on
+// an out-of-tree provider module — see providers/dynamo/mocker.go
+// (AnnotationDynamoTestBackend / DynamoTestBackendMocker).
+const (
+	dynamoMockerAnnotation = "airunway.ai/dynamo-test-backend"
+	dynamoMockerValue      = "mocker"
+)
+
+// isDynamoMockerMode reports whether the ModelDeployment opts into the Dynamo
+// mocker test backend on the dynamo provider. Mocker mode runs the GPU-less
+// python3 -m dynamo.mocker behind a standalone Frontend and intentionally does
+// not create an InferencePool/EPP, so the controller must skip the GPU-oriented
+// validation and gateway/GAIE reconciliation it would otherwise apply.
+func isDynamoMockerMode(md *airunwayv1alpha1.ModelDeployment) bool {
+	return md.Annotations[dynamoMockerAnnotation] == dynamoMockerValue &&
+		md.Spec.Provider != nil && md.Spec.Provider.Name == "dynamo"
+}
+
 // resolveServicePort looks up the first HTTP port on the named service.
 func (r *ModelDeploymentReconciler) resolveServicePort(ctx context.Context, serviceName, namespace string) int32 {
 	var svc corev1.Service
 
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"testing"
 
+	"k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
@@ -268,6 +269,41 @@ func TestGateway_HTTPRouteCreation(t *testing.T) {
 	}
 }
 
+func TestGateway_DynamoMockerSkipsCreation(t *testing.T) {
+	scheme := newTestScheme()
+	md := newModelDeployment("test-model", "default")
+	// Gateway is left at its default (enabled) and the GAIE CRDs are available,
+	// so only the mocker annotation should keep the controller off the gateway
+	// path. The dynamo standalone-Frontend mocker DGD never creates a
+	// provider-managed InferencePool, so engaging gateway would loop on NotFound.
+	md.Spec.Provider = &airunwayv1alpha1.ProviderSpec{Name: "dynamo"}
+	md.Annotations = map[string]string{"airunway.ai/dynamo-test-backend": "mocker"}
+	detector := fakeDetector(true, "my-gateway", "gateway-ns")
+	r := newTestReconciler(scheme, detector, md)
+	ctx := context.Background()
+
+	if err := r.reconcileGateway(ctx, md); err != nil {
+		t.Fatalf("reconcileGateway failed: %v", err)
+	}
+
+	// No InferencePool should be created.
+	var pool inferencev1.InferencePool
+	if err := r.Get(ctx, types.NamespacedName{Name: "test-model", Namespace: "default"}, &pool); err == nil {
+		t.Error("expected InferencePool to NOT be created in dynamo mocker mode")
+	}
+
+	// No HTTPRoute should be created.
+	var route gatewayv1.HTTPRoute
+	if err := r.Get(ctx, types.NamespacedName{Name: "test-model", Namespace: "default"}, &route); err == nil {
+		t.Error("expected HTTPRoute to NOT be created in dynamo mocker mode")
+	}
+
+	// And no GatewayReady condition should have been set (neither true nor false).
+	if c := meta.FindStatusCondition(md.Status.Conditions, airunwayv1alpha1.ConditionTypeGatewayReady); c != nil {
+		t.Errorf("expected no GatewayReady condition in mocker mode, got %q/%q", c.Status, c.Reason)
+	}
+}
+
 func TestGateway_DisabledSkipsCreation(t *testing.T) {
 	scheme := newTestScheme()
 	md := newModelDeployment("test-model", "default")
 
@@ -368,6 +368,17 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
 		return fmt.Errorf("engine.type must be specified or auto-selected from provider capabilities")
 	}
 
+	// Mocker mode escape hatch: a ModelDeployment annotated with
+	// airunway.ai/dynamo-test-backend=mocker targeting the dynamo provider runs
+	// the GPU-less python3 -m dynamo.mocker backend, so the GPU compatibility and
+	// disaggregated gpu.count checks below must not reject it. This mirrors the
+	// admission webhook (see modeldeployment_webhook.go) so the two cannot drift.
+	// Mocker is vLLM-only.
+	isDynamoMocker := isDynamoMockerMode(md)
+	if isDynamoMocker && engineType != airunwayv1alpha1.EngineTypeVLLM {
+		return fmt.Errorf("the dynamo mocker test backend only supports the vllm engine")
+	}
+
 	// Validate provider/engine/serving-mode/GPU-CPU compatibility via the
 	// shared helper so the webhook and reconciler cannot drift.
 	gpuCount := int32(0)
@@ -385,17 +396,19 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
 			}
 		}
 	}
-	if ces := validation.CheckProviderCompatibility(
-		providerName,
-		namedConfig,
-		providerConfigs,
-		engineType,
-		servingMode,
-		gpuCount,
-	); len(ces) > 0 {
-		// Return the first error to preserve the reconciler's existing
-		// single-error contract.
-		return fmt.Errorf("%s", ces[0].Message)
+	if !isDynamoMocker {
+		if ces := validation.CheckProviderCompatibility(
+			providerName,
+			namedConfig,
+			providerConfigs,
+			engineType,
+			servingMode,
+			gpuCount,
+		); len(ces) > 0 {
+			// Return the first error to preserve the reconciler's existing
+			// single-error contract.
+			return fmt.Errorf("%s", ces[0].Message)
+		}
 	}
 
 	// Validate disaggregated mode configuration
@@ -410,14 +423,19 @@ func (r *ModelDeploymentReconciler) validateSpec(ctx context.Context, md *airunw
 			return fmt.Errorf("disaggregated mode requires scaling.prefill and scaling.decode")
 		}
 
-		// Prefill must have GPU
-		if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
-			return fmt.Errorf("disaggregated mode requires scaling.prefill.gpu.count > 0")
-		}
+		// The GPU-less mocker backend waives the per-component gpu.count
+		// requirement, but the prefill/decode blocks themselves are still
+		// required (above) so the dynamo transformer can build both workers.
+		if !isDynamoMocker {
+			// Prefill must have GPU
+			if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
+				return fmt.Errorf("disaggregated mode requires scaling.prefill.gpu.count > 0")
+			}
 
-		// Decode must have GPU
-		if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
-			return fmt.Errorf("disaggregated mode requires scaling.decode.gpu.count > 0")
+			// Decode must have GPU
+			if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
+				return fmt.Errorf("disaggregated mode requires scaling.decode.gpu.count > 0")
+			}
 		}
 	}
 
 
@@ -126,6 +126,73 @@ func TestValidateSpec(t *testing.T) {
 			},
 			providerConfigs: allProviders(),
 		},
+		{
+			name: "valid: CPU-only aggregated vllm on dynamo with mocker annotation",
+			md: airunwayv1alpha1.ModelDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
+				},
+				Spec: airunwayv1alpha1.ModelDeploymentSpec{
+					Model:    airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
+					Engine:   airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
+					Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
+					// No resources.gpu: the GPU-less mocker backend waives it.
+				},
+			},
+			providerConfigs: allProviders(),
+		},
+		{
+			name: "valid: CPU-only disaggregated vllm on dynamo with mocker annotation",
+			md: airunwayv1alpha1.ModelDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
+				},
+				Spec: airunwayv1alpha1.ModelDeploymentSpec{
+					Model:    airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
+					Engine:   airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
+					Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
+					Serving:  &airunwayv1alpha1.ServingSpec{Mode: airunwayv1alpha1.ServingModeDisaggregated},
+					// prefill/decode blocks present but no gpu.count.
+					Scaling: &airunwayv1alpha1.ScalingSpec{
+						Prefill: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
+						Decode:  &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
+					},
+				},
+			},
+			providerConfigs: allProviders(),
+		},
+		{
+			name: "invalid: non-vllm engine on dynamo even with mocker annotation",
+			md: airunwayv1alpha1.ModelDeployment{
+				ObjectMeta: metav1.ObjectMeta{
+					Annotations: map[string]string{"airunway.ai/dynamo-test-backend": "mocker"},
+				},
+				Spec: airunwayv1alpha1.ModelDeploymentSpec{
+					Model:    airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
+					Engine:   airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeSGLang},
+					Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
+				},
+			},
+			providerConfigs: allProviders(),
+			wantErr:         "only supports the vllm engine",
+		},
+		{
+			name: "invalid: CPU-only disaggregated vllm on dynamo WITHOUT mocker annotation",
+			md: airunwayv1alpha1.ModelDeployment{
+				Spec: airunwayv1alpha1.ModelDeploymentSpec{
+					Model:    airunwayv1alpha1.ModelSpec{ID: "Qwen/Qwen3-0.6B", Source: airunwayv1alpha1.ModelSourceHuggingFace},
+					Engine:   airunwayv1alpha1.EngineSpec{Type: airunwayv1alpha1.EngineTypeVLLM},
+					Provider: &airunwayv1alpha1.ProviderSpec{Name: "dynamo"},
+					Serving:  &airunwayv1alpha1.ServingSpec{Mode: airunwayv1alpha1.ServingModeDisaggregated},
+					Scaling: &airunwayv1alpha1.ScalingSpec{
+						Prefill: &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
+						Decode:  &airunwayv1alpha1.ComponentScalingSpec{Replicas: 1},
+					},
+				},
+			},
+			providerConfigs: allProviders(),
+			wantErr:         "scaling.prefill.gpu.count > 0",
+		},
 		{
 			name: "valid: llamacpp CPU-only on kaito",
 			md: airunwayv1alpha1.ModelDeployment{
 
@@ -291,7 +291,32 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
 	// admission; falls back to the uncached APIReader only when the cache
 	// reports NotFound, to absorb the race where a brand-new
 	// InferenceProviderConfig hasn't yet propagated to informers.
-	if spec.Provider != nil && spec.Provider.Name != "" && spec.Engine.Type != "" && v.Reader != nil {
+	//
+	// Mocker mode escape hatch: a ModelDeployment annotated with
+	// airunway.ai/dynamo-test-backend=mocker targeting the dynamo provider runs
+	// the GPU-less python3 -m dynamo.mocker backend, so the provider's GPU
+	// capability check must not reject it at admission. This is a test-only path
+	// (the dynamo provider re-validates compatibility during reconciliation).
+	// The annotation key is kept as a literal here to avoid importing the
+	// provider module from the controller webhook (see
+	// providers/dynamo/mocker.go AnnotationDynamoTestBackend / DynamoTestBackendMocker).
+	isDynamoMocker := obj.Annotations["airunway.ai/dynamo-test-backend"] == "mocker" &&
+		spec.Provider != nil && spec.Provider.Name == "dynamo"
+
+	// The Dynamo mocker backend only simulates the vLLM engine. Enforce the
+	// vLLM-only constraint at admission so a non-vllm engine + mocker annotation
+	// is rejected here rather than admitted and failing later during provider
+	// reconciliation (the dynamo provider re-validates this too). An empty engine
+	// type is allowed — the provider defaults it to vllm.
+	if isDynamoMocker && spec.Engine.Type != "" && spec.Engine.Type != airunwayv1alpha1.EngineTypeVLLM {
+		allErrs = append(allErrs, field.Invalid(
+			specPath.Child("engine", "type"),
+			spec.Engine.Type,
+			"the dynamo mocker test backend only supports the vllm engine",
+		))
+	}
+
+	if !isDynamoMocker && spec.Provider != nil && spec.Provider.Name != "" && spec.Engine.Type != "" && v.Reader != nil {
 		var providerConfig airunwayv1alpha1.InferenceProviderConfig
 		err := v.Reader.Get(ctx, client.ObjectKey{Name: spec.Provider.Name}, &providerConfig)
 		if apierrors.IsNotFound(err) && v.APIReader != nil {
@@ -383,7 +408,11 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
 					specPath.Child("scaling", "prefill"),
 					"disaggregated mode requires scaling.prefill",
 				))
-			} else {
+			} else if !isDynamoMocker {
+				// Mocker mode runs the GPU-less python3 -m dynamo.mocker backend,
+				// so a CPU-only disaggregated mocker deployment legitimately omits
+				// scaling.prefill.gpu.count. The prefill block itself is still
+				// required (above) so the dynamo transformer can build the worker.
 				if spec.Scaling.Prefill.GPU == nil || spec.Scaling.Prefill.GPU.Count == 0 {
 					allErrs = append(allErrs, field.Required(
 						specPath.Child("scaling", "prefill", "gpu", "count"),
@@ -397,7 +426,9 @@ func (v *ModelDeploymentCustomValidator) validateSpec(ctx context.Context, obj *
 					specPath.Child("scaling", "decode"),
 					"disaggregated mode requires scaling.decode",
 				))
-			} else {
+			} else if !isDynamoMocker {
+				// See the prefill note above: mocker mode waives the GPU-count
+				// requirement while still requiring the decode block.
 				if spec.Scaling.Decode.GPU == nil || spec.Scaling.Decode.GPU.Count == 0 {
 					allErrs = append(allErrs, field.Required(
 						specPath.Child("scaling", "decode", "gpu", "count"),