Skip to content

Commit 1f6961d

Browse files
authored
feat: add support for endpoint picker (#823)
**Description** This PR addes support for inferencePool, which allows Envoy AI Gateway to integrate with ANY endpoint picker who is supported the inferencePool. By integrating with the Endpoint Picker like Gateway API Inference Extenstion or the non-GIE EPP, it can expand Envoy AI Gateway`s abilities to advanced scheduleing algorithm to optimize inference. **Related Issues/PRs (if applicable)** Fixes: #423 Fixes #604 Fixes: #648 Some follow-up: #911 --------- Signed-off-by: bitliu <[email protected]>
1 parent 42965a1 commit 1f6961d

40 files changed

+3906
-401
lines changed

api/v1alpha1/ai_gateway_route.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,23 @@ type AIGatewayRouteSpec struct {
214214
}
215215

216216
// AIGatewayRouteRule is a rule that defines the routing behavior of the AIGatewayRoute.
217+
//
218+
// +kubebuilder:validation:XValidation:rule="!has(self.backendRefs) || size(self.backendRefs) == 0 || (self.backendRefs.all(ref, !has(ref.group) && !has(ref.kind)) || self.backendRefs.all(ref, has(ref.group) && has(ref.kind)))", message="cannot mix InferencePool and AIServiceBackend references in the same rule"
219+
// +kubebuilder:validation:XValidation:rule="!has(self.backendRefs) || size(self.backendRefs) == 0 || !self.backendRefs.exists(ref, has(ref.group) && has(ref.kind)) || size(self.backendRefs) == 1", message="only one InferencePool backend is allowed per rule"
217220
type AIGatewayRouteRule struct {
218-
// BackendRefs is the list of AIServiceBackend that this rule will route the traffic to.
221+
// BackendRefs is the list of backends that this rule will route the traffic to.
219222
// Each backend can have a weight that determines the traffic distribution.
220223
//
221224
// The namespace of each backend is "local", i.e. the same namespace as the AIGatewayRoute.
222225
//
223-
// By configuring multiple backends, you can achieve the fallback behavior in the case of
224-
// the primary backend is not available combined with the BackendTrafficPolicy of Envoy Gateway.
226+
// BackendRefs can reference either AIServiceBackend resources (default) or InferencePool resources
227+
// from the Gateway API Inference Extension. When referencing InferencePool resources:
228+
// - Only one InferencePool backend is allowed per rule
229+
// - Cannot mix InferencePool with AIServiceBackend references in the same rule
230+
// - Fallback behavior is handled by the InferencePool's endpoint picker
231+
//
232+
// For AIServiceBackend references, you can achieve fallback behavior by configuring multiple backends
233+
// combined with the BackendTrafficPolicy of Envoy Gateway.
225234
// Please refer to https://gateway.envoyproxy.io/docs/tasks/traffic/failover/ as well as
226235
// https://gateway.envoyproxy.io/docs/tasks/traffic/retry/.
227236
//
@@ -277,17 +286,42 @@ type AIGatewayRouteRule struct {
277286
}
278287

279288
// AIGatewayRouteRuleBackendRef is a reference to a backend with a weight.
289+
// It can reference either an AIServiceBackend or an InferencePool resource.
290+
//
291+
// +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
292+
// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
280293
type AIGatewayRouteRuleBackendRef struct {
281-
// Name is the name of the AIServiceBackend.
294+
// Name is the name of the backend resource.
295+
// When Group and Kind are not specified, this refers to an AIServiceBackend.
296+
// When Group and Kind are specified, this refers to the resource of the specified type.
282297
//
283298
// +kubebuilder:validation:Required
284299
// +kubebuilder:validation:MinLength=1
285300
Name string `json:"name"`
286301

302+
// Group is the group of the backend resource.
303+
// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
304+
// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
305+
//
306+
// +optional
307+
// +kubebuilder:validation:MaxLength=253
308+
// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
309+
Group *string `json:"group,omitempty"`
310+
311+
// Kind is the kind of the backend resource.
312+
// When not specified, defaults to AIServiceBackend.
313+
// Currently, only "InferencePool" is supported when Group is specified.
314+
//
315+
// +optional
316+
// +kubebuilder:validation:MaxLength=63
317+
// +kubebuilder:validation:Pattern=`^$|^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
318+
Kind *string `json:"kind,omitempty"`
319+
287320
// Name of the model in the backend. If provided this will override the name provided in the request.
321+
// This field is ignored when referencing InferencePool resources.
288322
ModelNameOverride string `json:"modelNameOverride,omitempty"`
289323

290-
// Weight is the weight of the AIServiceBackend. This is exactly the same as the weight in
324+
// Weight is the weight of the backend. This is exactly the same as the weight in
291325
// the BackendRef in the Gateway API. See for the details:
292326
// https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef
293327
//
@@ -297,9 +331,10 @@ type AIGatewayRouteRuleBackendRef struct {
297331
// +kubebuilder:validation:Minimum=0
298332
// +kubebuilder:default=1
299333
Weight *int32 `json:"weight,omitempty"`
300-
// Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
334+
// Priority is the priority of the backend. This sets the priority on the underlying endpoints.
301335
// See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
302336
// Note: This will override the `faillback` property of the underlying Envoy Gateway Backend
337+
// This field is ignored when referencing InferencePool resources.
303338
//
304339
// Default is 0.
305340
//

api/v1alpha1/ai_gateway_route_helper.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,19 @@ const (
1313
// defaultRequestTimeout is the default timeout for HTTP requests when not specified.
1414
// Changed from Envoy Gateway's default of 15s to 60s for AI workloads.
1515
defaultRequestTimeout gwapiv1.Duration = "60s"
16+
17+
// inferencePoolGroup is the API group for InferencePool resources.
18+
inferencePoolGroup = "inference.networking.x-k8s.io"
19+
// inferencePoolKind is the kind for InferencePool resources.
20+
inferencePoolKind = "InferencePool"
1621
)
1722

1823
// GetTimeoutsWithDefaults returns the timeouts with default values applied when not specified.
1924
// This ensures that AI Gateway routes have appropriate timeout defaults for AI workloads.
2025
func (r *AIGatewayRouteRule) GetTimeoutsOrDefault() *gwapiv1.HTTPRouteTimeouts {
2126
defaultTimeout := defaultRequestTimeout
2227

23-
if r.Timeouts == nil {
28+
if r == nil || r.Timeouts == nil {
2429
// If no timeouts are specified, use default request timeout.
2530
return &gwapiv1.HTTPRouteTimeouts{
2631
Request: &defaultTimeout,
@@ -37,3 +42,43 @@ func (r *AIGatewayRouteRule) GetTimeoutsOrDefault() *gwapiv1.HTTPRouteTimeouts {
3742
// Return as-is if request timeout is already specified.
3843
return r.Timeouts
3944
}
45+
46+
// IsInferencePool returns true if the backend reference points to an InferencePool resource.
47+
func (ref *AIGatewayRouteRuleBackendRef) IsInferencePool() bool {
48+
if ref == nil {
49+
return false
50+
}
51+
return ref.Group != nil && ref.Kind != nil &&
52+
*ref.Group == inferencePoolGroup && *ref.Kind == inferencePoolKind
53+
}
54+
55+
// IsAIServiceBackend returns true if the backend reference points to an AIServiceBackend resource.
56+
func (ref *AIGatewayRouteRuleBackendRef) IsAIServiceBackend() bool {
57+
return !ref.IsInferencePool()
58+
}
59+
60+
// HasInferencePoolBackends returns true if the rule contains any InferencePool backend references.
61+
func (r *AIGatewayRouteRule) HasInferencePoolBackends() bool {
62+
if r == nil {
63+
return false
64+
}
65+
for _, ref := range r.BackendRefs {
66+
if ref.IsInferencePool() {
67+
return true
68+
}
69+
}
70+
return false
71+
}
72+
73+
// HasAIServiceBackends returns true if the rule contains any AIServiceBackend references.
74+
func (r *AIGatewayRouteRule) HasAIServiceBackends() bool {
75+
if r == nil {
76+
return false
77+
}
78+
for _, ref := range r.BackendRefs {
79+
if ref.IsAIServiceBackend() {
80+
return true
81+
}
82+
}
83+
return false
84+
}

api/v1alpha1/ai_gateway_route_helper_test.go

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,190 @@ func TestAIGatewayRouteRule_GetTimeoutsWithDefaults(t *testing.T) {
7373
})
7474
}
7575
}
76+
77+
func TestAIGatewayRouteRuleBackendRef_IsInferencePool(t *testing.T) {
78+
tests := []struct {
79+
name string
80+
ref *AIGatewayRouteRuleBackendRef
81+
expected bool
82+
}{
83+
{
84+
name: "AIServiceBackend reference (no group/kind)",
85+
ref: &AIGatewayRouteRuleBackendRef{
86+
Name: "test-backend",
87+
},
88+
expected: false,
89+
},
90+
{
91+
name: "InferencePool reference",
92+
ref: &AIGatewayRouteRuleBackendRef{
93+
Name: "test-pool",
94+
Group: ptr.To(inferencePoolGroup),
95+
Kind: ptr.To(inferencePoolKind),
96+
},
97+
expected: true,
98+
},
99+
{
100+
name: "Other resource reference",
101+
ref: &AIGatewayRouteRuleBackendRef{
102+
Name: "test-other",
103+
Group: ptr.To("other.group"),
104+
Kind: ptr.To("OtherKind"),
105+
},
106+
expected: false,
107+
},
108+
{
109+
name: "Partial reference (only group)",
110+
ref: &AIGatewayRouteRuleBackendRef{
111+
Name: "test-partial",
112+
Group: ptr.To(inferencePoolGroup),
113+
},
114+
expected: false,
115+
},
116+
}
117+
118+
for _, tt := range tests {
119+
t.Run(tt.name, func(t *testing.T) {
120+
result := tt.ref.IsInferencePool()
121+
require.Equal(t, tt.expected, result)
122+
})
123+
}
124+
}
125+
126+
func TestAIGatewayRouteRuleBackendRef_IsAIServiceBackend(t *testing.T) {
127+
tests := []struct {
128+
name string
129+
ref *AIGatewayRouteRuleBackendRef
130+
expected bool
131+
}{
132+
{
133+
name: "AIServiceBackend reference (no group/kind)",
134+
ref: &AIGatewayRouteRuleBackendRef{
135+
Name: "test-backend",
136+
},
137+
expected: true,
138+
},
139+
{
140+
name: "InferencePool reference",
141+
ref: &AIGatewayRouteRuleBackendRef{
142+
Name: "test-pool",
143+
Group: ptr.To(inferencePoolGroup),
144+
Kind: ptr.To(inferencePoolKind),
145+
},
146+
expected: false,
147+
},
148+
}
149+
150+
for _, tt := range tests {
151+
t.Run(tt.name, func(t *testing.T) {
152+
result := tt.ref.IsAIServiceBackend()
153+
require.Equal(t, tt.expected, result)
154+
})
155+
}
156+
}
157+
158+
func TestAIGatewayRouteRule_HasInferencePoolBackends(t *testing.T) {
159+
tests := []struct {
160+
name string
161+
rule *AIGatewayRouteRule
162+
expected bool
163+
}{
164+
{
165+
name: "No backends",
166+
rule: &AIGatewayRouteRule{
167+
BackendRefs: []AIGatewayRouteRuleBackendRef{},
168+
},
169+
expected: false,
170+
},
171+
{
172+
name: "Only AIServiceBackend references",
173+
rule: &AIGatewayRouteRule{
174+
BackendRefs: []AIGatewayRouteRuleBackendRef{
175+
{Name: "backend1"},
176+
{Name: "backend2"},
177+
},
178+
},
179+
expected: false,
180+
},
181+
{
182+
name: "Only InferencePool reference",
183+
rule: &AIGatewayRouteRule{
184+
BackendRefs: []AIGatewayRouteRuleBackendRef{
185+
{
186+
Name: "pool1",
187+
Group: ptr.To(inferencePoolGroup),
188+
Kind: ptr.To(inferencePoolKind),
189+
},
190+
},
191+
},
192+
expected: true,
193+
},
194+
{
195+
name: "Mixed references (should not happen due to validation)",
196+
rule: &AIGatewayRouteRule{
197+
BackendRefs: []AIGatewayRouteRuleBackendRef{
198+
{Name: "backend1"},
199+
{
200+
Name: "pool1",
201+
Group: ptr.To(inferencePoolGroup),
202+
Kind: ptr.To(inferencePoolKind),
203+
},
204+
},
205+
},
206+
expected: true,
207+
},
208+
}
209+
210+
for _, tt := range tests {
211+
t.Run(tt.name, func(t *testing.T) {
212+
result := tt.rule.HasInferencePoolBackends()
213+
require.Equal(t, tt.expected, result)
214+
})
215+
}
216+
}
217+
218+
func TestAIGatewayRouteRule_HasAIServiceBackends(t *testing.T) {
219+
tests := []struct {
220+
name string
221+
rule *AIGatewayRouteRule
222+
expected bool
223+
}{
224+
{
225+
name: "No backends",
226+
rule: &AIGatewayRouteRule{
227+
BackendRefs: []AIGatewayRouteRuleBackendRef{},
228+
},
229+
expected: false,
230+
},
231+
{
232+
name: "Only AIServiceBackend references",
233+
rule: &AIGatewayRouteRule{
234+
BackendRefs: []AIGatewayRouteRuleBackendRef{
235+
{Name: "backend1"},
236+
{Name: "backend2"},
237+
},
238+
},
239+
expected: true,
240+
},
241+
{
242+
name: "Only InferencePool reference",
243+
rule: &AIGatewayRouteRule{
244+
BackendRefs: []AIGatewayRouteRuleBackendRef{
245+
{
246+
Name: "pool1",
247+
Group: ptr.To(inferencePoolGroup),
248+
Kind: ptr.To(inferencePoolKind),
249+
},
250+
},
251+
},
252+
expected: false,
253+
},
254+
}
255+
256+
for _, tt := range tests {
257+
t.Run(tt.name, func(t *testing.T) {
258+
result := tt.rule.HasAIServiceBackends()
259+
require.Equal(t, tt.expected, result)
260+
})
261+
}
262+
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/aigw/envoy-gateway-config.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,25 @@ logging:
2323
extensionApis:
2424
enableBackend: true
2525
extensionManager:
26+
backendResources:
27+
- group: inference.networking.x-k8s.io
28+
kind: InferencePool
29+
version: v1alpha2
2630
hooks:
2731
xdsTranslator:
32+
translation:
33+
listener:
34+
includeAll: true
35+
route:
36+
includeAll: true
37+
cluster:
38+
includeAll: true
39+
secret:
40+
includeAll: true
2841
post:
29-
- VirtualHost
3042
- Translation
43+
- Cluster
44+
- Route
3145
service:
3246
fqdn:
3347
hostname: localhost

0 commit comments

Comments
 (0)