Skip to content

Commit f5fd48d

Browse files
feat: Define provider specific gateway capabilities for llm-d (#288)
Signed-off-by: Eric Bishop <ericbish.dev@gmail.com> Co-authored-by: Robbie Cronin <robert.owen.cronin@gmail.com>
1 parent 9a923f9 commit f5fd48d

20 files changed

Lines changed: 530 additions & 27 deletions

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ GAIE_VERSION_RE := $(subst .,\.,$(GAIE_VERSION))
281281
DYNAMO_VERSION_RE := $(subst .,\.,$(DYNAMO_VERSION))
282282
KAITO_VERSION_RE := $(subst .,\.,$(KAITO_VERSION))
283283
VLLM_VERSION_RE := $(subst .,\.,$(VLLM_VERSION))
284+
LLMD_VERSION_RE := $(subst .,\.,$(LLMD_VERSION))
284285

285286
verify-versions:
286287
@# 1. controller/go.mod must pin GAIE_VERSION
@@ -301,7 +302,10 @@ verify-versions:
301302
@# 6. providers/vllm/transformer.go fallback literal must match VLLM_VERSION
302303
@grep -qE '^var VLLMVersion = "$(VLLM_VERSION_RE)"$$' providers/vllm/transformer.go || \
303304
{ echo "❌ providers/vllm/transformer.go VLLMVersion fallback != $(VLLM_VERSION) (from versions.env)"; exit 1; }
304-
@# 7. generated TS must be in sync with versions.env.
305+
@# 7. providers/llmd/config.go fallback literal must match LLMD_VERSION
306+
@grep -qE '^var LLMDSchedulerImage = "ghcr\.io/llm-d/llm-d-inference-scheduler:v$(LLMD_VERSION_RE)"$$' providers/llmd/config.go || \
307+
{ echo "❌ providers/llmd/config.go LLMDSchedulerImage tag != $(LLMD_VERSION) (from versions.env)"; exit 1; }
308+
@# 8. generated TS must be in sync with versions.env.
305309
@# Generate to a temp file and diff against the working-tree copy so
306310
@# that synced uncommitted edits pass (the local-dev case) while
307311
@# stale committed files still fail (the CI case — CI's working

controller/api/v1alpha1/inferenceproviderconfig_types.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,27 @@ type ProviderCapabilities struct {
9393
}
9494

9595
// GatewayCapabilities defines gateway-related capabilities for a specific engine.
96+
//
97+
// There are two independent extension points:
98+
//
99+
// 1. Full InferencePool + EPP delegation. When ManagesInferencePool is true,
100+
// the controller assumes the provider's upstream operator creates both the
101+
// InferencePool and the Endpoint Picker (EPP) downstream (e.g. NVIDIA Dynamo
102+
// creates them from a DynamoGraphDeployment). The controller waits for the
103+
// named pool, reads its EndpointPickerRef, and wires HTTPRoute/ReferenceGrant
104+
// accordingly. The controller does not create an InferencePool or EPP itself.
105+
//
106+
// 2. Endpoint Picker customization. When EndpointPicker is set, the controller
107+
// still creates the default InferencePool and manages the EPP & scaffolding
108+
// (ServiceAccount, Role, RoleBinding, ConfigMap, Deployment, Service), but
109+
// substitutes the provider-supplied EPP image and plugin config. This lets a
110+
// provider ship its own scheduler (e.g. the llm-d Endpoint Picker with its
111+
// own scoring plugins) without re-implementing the surrounding RBAC and
112+
// plumbing.
113+
//
114+
// The two extension points can be specified independently, but
115+
// ManagesInferencePool takes precedence: when it is true, EndpointPicker is
116+
// ignored (the provider is then expected to manage the EPP itself).
96117
type GatewayCapabilities struct {
97118
// managesInferencePool indicates that the provider's operator creates and
98119
// owns the GAIE InferencePool (and EPP) for ModelDeployments using this
@@ -117,6 +138,13 @@ type GatewayCapabilities struct {
117138
// +optional
118139
InferencePoolNamespace string `json:"inferencePoolNamespace,omitempty"`
119140

141+
// endpointPicker, when set, customizes the EPP image and plugin
142+
// configuration that the controller deploys alongside the default
143+
// InferencePool. Ignored when ManagesInferencePool is true (the provider
144+
// is then expected to manage the EPP itself).
145+
// +optional
146+
EndpointPicker *EndpointPickerCapabilities `json:"endpointPicker,omitempty"`
147+
120148
// ignoresServedName indicates that gateway routing for this provider+engine
121149
// pair does not honor spec.model.servedName, so the controller should fall
122150
// back to auto-discovery / spec.model.id when computing the route model
@@ -126,6 +154,24 @@ type GatewayCapabilities struct {
126154
IgnoresServedName bool `json:"ignoresServedName,omitempty"`
127155
}
128156

157+
// EndpointPickerCapabilities lets a provider override the EPP image and plugin
158+
// configuration used by the controller-managed Endpoint Picker. All other EPP
159+
// resources (ServiceAccount, Role, RoleBinding, ConfigMap, Deployment, Service)
160+
// are still created by the controller using the same shape as the default EPP.
161+
type EndpointPickerCapabilities struct {
162+
// image is the container image for the EPP. When empty, the controller
163+
// uses its built-in default GAIE EPP image.
164+
// +optional
165+
Image string `json:"image,omitempty"`
166+
167+
// configData is the raw YAML body of the EndpointPickerConfig that will be
168+
// written into the EPP ConfigMap under the key "default-plugins.yaml" and
169+
// mounted at /config/default-plugins.yaml. When empty, the controller's
170+
// default (empty) EndpointPickerConfig is used.
171+
// +optional
172+
ConfigData string `json:"configData,omitempty"`
173+
}
174+
129175
// HasEngine returns true if the provider supports the given engine type
130176
func (c *ProviderCapabilities) HasEngine(engine EngineType) bool {
131177
return c.GetEngineCapability(engine) != nil

controller/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 21 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controller/config/crd/bases/airunway.ai_inferenceproviderconfigs.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,26 @@ spec:
7171
description: gateway defines this engine's gateway-related
7272
capabilities.
7373
properties:
74+
endpointPicker:
75+
description: |-
76+
endpointPicker, when set, customizes the EPP image and plugin
77+
configuration that the controller deploys alongside the default
78+
InferencePool. Ignored when ManagesInferencePool is true (the provider
79+
is then expected to manage the EPP itself).
80+
properties:
81+
configData:
82+
description: |-
83+
configData is the raw YAML body of the EndpointPickerConfig that will be
84+
written into the EPP ConfigMap under the key "default-plugins.yaml" and
85+
mounted at /config/default-plugins.yaml. When empty, the controller's
86+
default (empty) EndpointPickerConfig is used.
87+
type: string
88+
image:
89+
description: |-
90+
image is the container image for the EPP. When empty, the controller
91+
uses its built-in default GAIE EPP image.
92+
type: string
93+
type: object
7494
ignoresServedName:
7595
description: |-
7696
ignoresServedName indicates that gateway routing for this provider+engine

controller/internal/controller/gateway_reconciler.go

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,15 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
117117
// Determine the HTTPRoute backend via the GAIE InferencePool/EPP path.
118118
poolName, poolNamespace := md.Name, md.Namespace
119119

120+
// Two independent extension points exist:
121+
// 1. InferencePool delegation (e.g. Dynamo): the provider's upstream
122+
// operator creates the InferencePool AND the EPP. The controller
123+
// skips both. Opt-in via gatewayCapabilities.ManagesInferencePool.
124+
// 2. EPP customization (e.g. llm-d): the controller creates the
125+
// InferencePool and the EPP scaffolding, but uses the provider-
126+
// supplied EPP image and plugin config. Opt-in via
127+
// gatewayCapabilities.EndpointPicker.
128+
120129
// Use provider managed inference pool if it exists,
121130
// otherwise use the default inference pool.
122131
if ok, err := r.providerInferencePoolExistsOrCreateDefault(ctx, md, gatewayCapabilities, gwConfig); ok && err == nil {
@@ -146,9 +155,12 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
146155

147156
if gatewayCapabilities != nil && gatewayCapabilities.ManagesInferencePool {
148157
logger.Info("Skipping EPP creation, provider manages EPP", "provider", resolvedProviderName(md))
149-
} else { // Use default EPP
150-
// Create or update EPP (EndPoint Picker) for the InferencePool
151-
if err := r.reconcileEPP(ctx, md); err != nil {
158+
} else { // Use controller-managed EPP (default or provider-customized).
159+
var eppOverrides *airunwayv1alpha1.EndpointPickerCapabilities
160+
if gatewayCapabilities != nil {
161+
eppOverrides = gatewayCapabilities.EndpointPicker
162+
}
163+
if err := r.reconcileEPP(ctx, md, eppOverrides); err != nil {
152164
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "EPPFailed", err.Error())
153165
return fmt.Errorf("reconciling EPP: %w", err)
154166
}
@@ -381,8 +393,11 @@ func resolveProviderPoolField(pattern, mdName, mdNamespace, fallback string) str
381393
}
382394

383395
// reconcileEPP creates or updates the Endpoint Picker Proxy deployment and service
384-
// for a ModelDeployment's InferencePool.
385-
func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) error {
396+
// for a ModelDeployment's InferencePool. When overrides is non-nil, its Image
397+
// and ConfigData take precedence over the controller's defaults.
398+
func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunwayv1alpha1.ModelDeployment, overrides *airunwayv1alpha1.EndpointPickerCapabilities) error {
399+
logger := log.FromContext(ctx)
400+
386401
eppName := md.Name + "-epp"
387402
eppPort := r.GatewayDetector.EPPServicePort
388403
if eppPort == 0 {
@@ -392,6 +407,9 @@ func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunw
392407
if eppImage == "" {
393408
eppImage = "registry.k8s.io/gateway-api-inference-extension/epp:" + gateway.DefaultGAIEVersion
394409
}
410+
if overrides != nil && overrides.Image != "" {
411+
eppImage = overrides.Image
412+
}
395413

396414
labels := map[string]string{
397415
"app.kubernetes.io/name": eppName,
@@ -480,10 +498,15 @@ func (r *ModelDeploymentReconciler) reconcileEPP(ctx context.Context, md *airunw
480498
},
481499
}
482500
if _, err := ctrl.CreateOrUpdate(ctx, r.Client, cm, func() error {
483-
cm.Data = map[string]string{
484-
"default-plugins.yaml": `apiVersion: inference.networking.x-k8s.io/v1alpha1
501+
pluginsYAML := `apiVersion: inference.networking.x-k8s.io/v1alpha1
485502
kind: EndpointPickerConfig
486-
`,
503+
`
504+
if overrides != nil && overrides.ConfigData != "" {
505+
logger.V(1).Info("Using provider overrides for EPP plugins config")
506+
pluginsYAML = overrides.ConfigData
507+
}
508+
cm.Data = map[string]string{
509+
"default-plugins.yaml": pluginsYAML,
487510
}
488511
return ctrl.SetControllerReference(md, cm, r.Scheme)
489512
}); err != nil {
@@ -942,7 +965,8 @@ func (r *ModelDeploymentReconciler) labelModelPods(ctx context.Context, md *airu
942965

943966
// List pods matching the service selector
944967
var pods corev1.PodList
945-
if err := r.List(ctx, &pods,
968+
if err := r.List(
969+
ctx, &pods,
946970
client.InNamespace(md.Namespace),
947971
client.MatchingLabels(svc.Spec.Selector),
948972
); err != nil {
@@ -1120,6 +1144,9 @@ func (r *ModelDeploymentReconciler) cleanupGatewayResources(ctx context.Context,
11201144
if gatewayCapabilities, err = r.resolveProviderGatewayCapabilities(ctx, md); err != nil {
11211145
logger.V(1).Info("Could not resolve provider gateway capabilities, proceeding without provider-specific gateway capabilities", "error", err)
11221146
}
1147+
// Only true delegation (ManagesInferencePool: true) means the provider
1148+
// owns the pool + EPP. EndpointPicker-only customization still leaves the
1149+
// pool and EPP scaffolding owned by the controller, so they must be cleaned up here.
11231150
providerManagedPool := gatewayCapabilities != nil && gatewayCapabilities.ManagesInferencePool
11241151

11251152
eppName := md.Name + "-epp"
@@ -1209,6 +1236,10 @@ func (r *ModelDeploymentReconciler) cleanupGatewayResources(ctx context.Context,
12091236
func (r *ModelDeploymentReconciler) providerInferencePoolExistsOrCreateDefault(ctx context.Context, md *airunwayv1alpha1.ModelDeployment, gatewayCapabilitities *airunwayv1alpha1.GatewayCapabilities, gwConfig *gateway.GatewayConfig) (bool, error) {
12101237
logger := log.FromContext(ctx)
12111238

1239+
// Only treat the pool as provider-managed when the provider has explicitly
1240+
// opted in via ManagesInferencePool. Providers that only customize the EPP
1241+
// (gatewayCapabilities.EndpointPicker without ManagesInferencePool) still
1242+
// rely on the controller to create the default InferencePool.
12121243
if gatewayCapabilitities != nil && gatewayCapabilitities.ManagesInferencePool {
12131244
// Provider manages the pool.
12141245
return true, nil

0 commit comments

Comments
 (0)