kaito-project
diff --git a/‎Makefile‎
Lines changed: 5 additions & 1 deletion b/‎Makefile‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎controller/api/v1alpha1/inferenceproviderconfig_types.go‎
Lines changed: 46 additions & 0 deletions b/‎controller/api/v1alpha1/inferenceproviderconfig_types.go‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎controller/api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 21 additions & 1 deletion b/‎controller/api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎controller/config/crd/bases/airunway.ai_inferenceproviderconfigs.yaml‎
Lines changed: 20 additions & 0 deletions b/‎controller/config/crd/bases/airunway.ai_inferenceproviderconfigs.yaml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎controller/internal/controller/gateway_reconciler.go‎
Lines changed: 40 additions & 9 deletions b/‎controller/internal/controller/gateway_reconciler.go‎
Lines changed: 40 additions & 9 deletions
@@ -281,6 +281,7 @@ GAIE_VERSION_RE := $(subst .,\.,$(GAIE_VERSION))
 DYNAMO_VERSION_RE := $(subst .,\.,$(DYNAMO_VERSION))
 KAITO_VERSION_RE := $(subst .,\.,$(KAITO_VERSION))
 VLLM_VERSION_RE := $(subst .,\.,$(VLLM_VERSION))
+LLMD_VERSION_RE := $(subst .,\.,$(LLMD_VERSION))
 
 verify-versions:
 	@# 1. controller/go.mod must pin GAIE_VERSION
@@ -301,7 +302,10 @@ verify-versions:
 	@# 6. providers/vllm/transformer.go fallback literal must match VLLM_VERSION
 	@grep -qE '^var VLLMVersion = "$(VLLM_VERSION_RE)"$$' providers/vllm/transformer.go || \
 	  { echo "❌ providers/vllm/transformer.go VLLMVersion fallback != $(VLLM_VERSION) (from versions.env)"; exit 1; }
-	@# 7. generated TS must be in sync with versions.env.
+	@# 7. providers/llmd/config.go fallback literal must match LLMD_VERSION
+	@grep -qE '^var LLMDSchedulerImage = "ghcr\.io/llm-d/llm-d-inference-scheduler:v$(LLMD_VERSION_RE)"$$' providers/llmd/config.go || \
+	  { echo "❌ providers/llmd/config.go LLMDSchedulerImage tag != $(LLMD_VERSION) (from versions.env)"; exit 1; }
+	@# 8. generated TS must be in sync with versions.env.
 	@#    Generate to a temp file and diff against the working-tree copy so
 	@#    that synced uncommitted edits pass (the local-dev case) while
 	@#    stale committed files still fail (the CI case — CI's working
 
@@ -93,6 +93,27 @@ type ProviderCapabilities struct {
 }
 
 // GatewayCapabilities defines gateway-related capabilities for a specific engine.
+//
+// There are two independent extension points:
+//
+//  1. Full InferencePool + EPP delegation. When ManagesInferencePool is true,
+//     the controller assumes the provider's upstream operator creates both the
+//     InferencePool and the Endpoint Picker (EPP) downstream (e.g. NVIDIA Dynamo
+//     creates them from a DynamoGraphDeployment). The controller waits for the
+//     named pool, reads its EndpointPickerRef, and wires HTTPRoute/ReferenceGrant
+//     accordingly. The controller does not create an InferencePool or EPP itself.
+//
+//  2. Endpoint Picker customization. When EndpointPicker is set, the controller
+//     still creates the default InferencePool and manages the EPP & scaffolding
+//     (ServiceAccount, Role, RoleBinding, ConfigMap, Deployment, Service), but
+//     substitutes the provider-supplied EPP image and plugin config. This lets a
+//     provider ship its own scheduler (e.g. the llm-d Endpoint Picker with its
+//     own scoring plugins) without re-implementing the surrounding RBAC and
+//     plumbing.
+//
+// The two extension points can be specified independently, but
+// ManagesInferencePool takes precedence: when it is true, EndpointPicker is
+// ignored (the provider is then expected to manage the EPP itself).
 type GatewayCapabilities struct {
 	// managesInferencePool indicates that the provider's operator creates and
 	// owns the GAIE InferencePool (and EPP) for ModelDeployments using this
@@ -117,6 +138,13 @@ type GatewayCapabilities struct {
 	// +optional
 	InferencePoolNamespace string `json:"inferencePoolNamespace,omitempty"`
 
+	// endpointPicker, when set, customizes the EPP image and plugin
+	// configuration that the controller deploys alongside the default
+	// InferencePool. Ignored when ManagesInferencePool is true (the provider
+	// is then expected to manage the EPP itself).
+	// +optional
+	EndpointPicker *EndpointPickerCapabilities `json:"endpointPicker,omitempty"`
+
 	// ignoresServedName indicates that gateway routing for this provider+engine
 	// pair does not honor spec.model.servedName, so the controller should fall
 	// back to auto-discovery / spec.model.id when computing the route model
@@ -126,6 +154,24 @@ type GatewayCapabilities struct {
 	IgnoresServedName bool `json:"ignoresServedName,omitempty"`
 }
 
+// EndpointPickerCapabilities lets a provider override the EPP image and plugin
+// configuration used by the controller-managed Endpoint Picker. All other EPP
+// resources (ServiceAccount, Role, RoleBinding, ConfigMap, Deployment, Service)
+// are still created by the controller using the same shape as the default EPP.
+type EndpointPickerCapabilities struct {
+	// image is the container image for the EPP. When empty, the controller
+	// uses its built-in default GAIE EPP image.
+	// +optional
+	Image string `json:"image,omitempty"`
+
+	// configData is the raw YAML body of the EndpointPickerConfig that will be
+	// written into the EPP ConfigMap under the key "default-plugins.yaml" and
+	// mounted at /config/default-plugins.yaml. When empty, the controller's
+	// default (empty) EndpointPickerConfig is used.
+	// +optional
+	ConfigData string `json:"configData,omitempty"`
+}
+
 // HasEngine returns true if the provider supports the given engine type
 func (c *ProviderCapabilities) HasEngine(engine EngineType) bool {
 	return c.GetEngineCapability(engine) != nil
 
@@ -71,6 +71,26 @@ spec:
                           description: gateway defines this engine's gateway-related
                             capabilities.
                           properties:
+                            endpointPicker:
+                              description: |-
+                                endpointPicker, when set, customizes the EPP image and plugin
+                                configuration that the controller deploys alongside the default
+                                InferencePool. Ignored when ManagesInferencePool is true (the provider
+                                is then expected to manage the EPP itself).
+                              properties:
+                                configData:
+                                  description: |-
+                                    configData is the raw YAML body of the EndpointPickerConfig that will be
+                                    written into the EPP ConfigMap under the key "default-plugins.yaml" and
+                                    mounted at /config/default-plugins.yaml. When empty, the controller's
+                                    default (empty) EndpointPickerConfig is used.
+                                  type: string
+                                image:
+                                  description: |-
+                                    image is the container image for the EPP. When empty, the controller
+                                    uses its built-in default GAIE EPP image.
+                                  type: string
+                              type: object
                             ignoresServedName:
                               description: |-
                                 ignoresServedName indicates that gateway routing for this provider+engine
 
@@ -117,6 +117,15 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
 	// Determine the HTTPRoute backend via the GAIE InferencePool/EPP path.
 	poolName, poolNamespace := md.Name, md.Namespace
 
+	// Two independent extension points exist:
+	//   1. InferencePool delegation (e.g. Dynamo): the provider's upstream
+	//      operator creates the InferencePool AND the EPP. The controller
+	//      skips both. Opt-in via gatewayCapabilities.ManagesInferencePool.
+	//   2. EPP customization (e.g. llm-d): the controller creates the
+	//      InferencePool and the EPP scaffolding, but uses the provider-
+	//      supplied EPP image and plugin config. Opt-in via
+	//      gatewayCapabilities.EndpointPicker.
+
 	// Use provider managed inference pool if it exists,
 	// otherwise use the default inference pool.
 	if ok, err := r.providerInferencePoolExistsOrCreateDefault(ctx, md, gatewayCapabilities, gwConfig); ok && err == nil {
@@ -146,9 +155,12 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
 
 	if gatewayCapabilities != nil && gatewayCapabilities.ManagesInferencePool {
 		logger.Info("Skipping EPP creation, provider manages EPP", "provider", resolvedProviderName(md))
-	} else { // Use default EPP
-		// Create or update EPP (EndPoint Picker) for the InferencePool
-		if err := r.reconcileEPP(ctx, md); err != nil {
+	} else { // Use controller-managed EPP (default or provider-customized).
+		var eppOverrides *airunwayv1alpha1.EndpointPickerCapabilities
+		if gatewayCapabilities != nil {
+			eppOverrides = gatewayCapabilities.EndpointPicker
+		}
+		if err := r.reconcileEPP(ctx, md, eppOverrides); err != nil {
 			r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "EPPFailed", err.Error())
 			return fmt.Errorf("reconciling EPP: %w", err)
 		}
@@ -381,8 +393,11 @@ func resolveProviderPoolField(pattern, mdName, mdNamespace, fallback string) str
 }
 
 // reconcileEPP creates or updates the Endpoint Picker Proxy deployment and service
-// for a ModelDeployment's InferencePool.
-func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) error {
+// for a ModelDeployment's InferencePool. When overrides is non-nil, its Image
+// and ConfigData take precedence over the controller's defaults.
+func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunwayv1alpha1.ModelDeployment, overrides *airunwayv1alpha1.EndpointPickerCapabilities) error {
+	logger := log.FromContext(ctx)
+
 	eppName := md.Name + "-epp"
 	eppPort := r.GatewayDetector.EPPServicePort
 	if eppPort == 0 {
@@ -392,6 +407,9 @@ func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunw
 	if eppImage == "" {
 		eppImage = "registry.k8s.io/gateway-api-inference-extension/epp:" + gateway.DefaultGAIEVersion
 	}
+	if overrides != nil && overrides.Image != "" {
+		eppImage = overrides.Image
+	}
 
 	labels := map[string]string{
 		"app.kubernetes.io/name":       eppName,
@@ -480,10 +498,15 @@ func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunw
 		},
 	}
 	if _, err := ctrl.CreateOrUpdate(ctx, r.Client, cm, func() error {
-		cm.Data = map[string]string{
-			"default-plugins.yaml": `apiVersion: inference.networking.x-k8s.io/v1alpha1
+		pluginsYAML := `apiVersion: inference.networking.x-k8s.io/v1alpha1
 kind: EndpointPickerConfig
-`,
+`
+		if overrides != nil && overrides.ConfigData != "" {
+			logger.V(1).Info("Using provider overrides for EPP plugins config")
+			pluginsYAML = overrides.ConfigData
+		}
+		cm.Data = map[string]string{
+			"default-plugins.yaml": pluginsYAML,
 		}
 		return ctrl.SetControllerReference(md, cm, r.Scheme)
 	}); err != nil {
@@ -942,7 +965,8 @@ func (r *ModelDeploymentReconciler) labelModelPods(ctx context.Context, md *airu
 
 	// List pods matching the service selector
 	var pods corev1.PodList
-	if err := r.List(ctx, &pods,
+	if err := r.List(
+		ctx, &pods,
 		client.InNamespace(md.Namespace),
 		client.MatchingLabels(svc.Spec.Selector),
 	); err != nil {
@@ -1120,6 +1144,9 @@ func (r *ModelDeploymentReconciler) cleanupGatewayResources(ctx context.Context,
 	if gatewayCapabilities, err = r.resolveProviderGatewayCapabilities(ctx, md); err != nil {
 		logger.V(1).Info("Could not resolve provider gateway capabilities, proceeding without provider-specific gateway capabilities", "error", err)
 	}
+	// Only true delegation (ManagesInferencePool: true) means the provider
+	// owns the pool + EPP. EndpointPicker-only customization still leaves the
+	// pool and EPP scaffolding owned by the controller, so they must be cleaned up here.
 	providerManagedPool := gatewayCapabilities != nil && gatewayCapabilities.ManagesInferencePool
 
 	eppName := md.Name + "-epp"
@@ -1209,6 +1236,10 @@ func (r *ModelDeploymentReconciler) cleanupGatewayResources(ctx context.Context,
 func (r *ModelDeploymentReconciler) providerInferencePoolExistsOrCreateDefault(ctx context.Context, md *airunwayv1alpha1.ModelDeployment, gatewayCapabilitities *airunwayv1alpha1.GatewayCapabilities, gwConfig *gateway.GatewayConfig) (bool, error) {
 	logger := log.FromContext(ctx)
 
+	// Only treat the pool as provider-managed when the provider has explicitly
+	// opted in via ManagesInferencePool. Providers that only customize the EPP
+	// (gatewayCapabilities.EndpointPicker without ManagesInferencePool) still
+	// rely on the controller to create the default InferencePool.
 	if gatewayCapabilitities != nil && gatewayCapabilitities.ManagesInferencePool {
 		// Provider manages the pool.
 		return true, nil