envoyproxy
diff --git a/‎api/v1alpha1/ai_gateway_route.go‎
Lines changed: 41 additions & 6 deletions b/‎api/v1alpha1/ai_gateway_route.go‎
Lines changed: 41 additions & 6 deletions
diff --git a/‎api/v1alpha1/ai_gateway_route_helper.go‎
Lines changed: 46 additions & 1 deletion b/‎api/v1alpha1/ai_gateway_route_helper.go‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎api/v1alpha1/ai_gateway_route_helper_test.go‎
Lines changed: 187 additions & 0 deletions b/‎api/v1alpha1/ai_gateway_route_helper_test.go‎
Lines changed: 187 additions & 0 deletions
diff --git a/‎api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 10 additions & 0 deletions b/‎api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎cmd/aigw/envoy-gateway-config.yaml‎
Lines changed: 15 additions & 1 deletion b/‎cmd/aigw/envoy-gateway-config.yaml‎
Lines changed: 15 additions & 1 deletion
@@ -214,14 +214,23 @@ type AIGatewayRouteSpec struct {
 }
 
 // AIGatewayRouteRule is a rule that defines the routing behavior of the AIGatewayRoute.
+//
+// +kubebuilder:validation:XValidation:rule="!has(self.backendRefs) || size(self.backendRefs) == 0 || (self.backendRefs.all(ref, !has(ref.group) && !has(ref.kind)) || self.backendRefs.all(ref, has(ref.group) && has(ref.kind)))", message="cannot mix InferencePool and AIServiceBackend references in the same rule"
+// +kubebuilder:validation:XValidation:rule="!has(self.backendRefs) || size(self.backendRefs) == 0 || !self.backendRefs.exists(ref, has(ref.group) && has(ref.kind)) || size(self.backendRefs) == 1", message="only one InferencePool backend is allowed per rule"
 type AIGatewayRouteRule struct {
-	// BackendRefs is the list of AIServiceBackend that this rule will route the traffic to.
+	// BackendRefs is the list of backends that this rule will route the traffic to.
 	// Each backend can have a weight that determines the traffic distribution.
 	//
 	// The namespace of each backend is "local", i.e. the same namespace as the AIGatewayRoute.
 	//
-	// By configuring multiple backends, you can achieve the fallback behavior in the case of
-	// the primary backend is not available combined with the BackendTrafficPolicy of Envoy Gateway.
+	// BackendRefs can reference either AIServiceBackend resources (default) or InferencePool resources
+	// from the Gateway API Inference Extension. When referencing InferencePool resources:
+	// - Only one InferencePool backend is allowed per rule
+	// - Cannot mix InferencePool with AIServiceBackend references in the same rule
+	// - Fallback behavior is handled by the InferencePool's endpoint picker
+	//
+	// For AIServiceBackend references, you can achieve fallback behavior by configuring multiple backends
+	// combined with the BackendTrafficPolicy of Envoy Gateway.
 	// Please refer to https://gateway.envoyproxy.io/docs/tasks/traffic/failover/ as well as
 	// https://gateway.envoyproxy.io/docs/tasks/traffic/retry/.
 	//
@@ -277,17 +286,42 @@ type AIGatewayRouteRule struct {
 }
 
 // AIGatewayRouteRuleBackendRef is a reference to a backend with a weight.
+// It can reference either an AIServiceBackend or an InferencePool resource.
+//
+// +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
+// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
 type AIGatewayRouteRuleBackendRef struct {
-	// Name is the name of the AIServiceBackend.
+	// Name is the name of the backend resource.
+	// When Group and Kind are not specified, this refers to an AIServiceBackend.
+	// When Group and Kind are specified, this refers to the resource of the specified type.
 	//
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MinLength=1
 	Name string `json:"name"`
 
+	// Group is the group of the backend resource.
+	// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
+	// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+	//
+	// +optional
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
+	Group *string `json:"group,omitempty"`
+
+	// Kind is the kind of the backend resource.
+	// When not specified, defaults to AIServiceBackend.
+	// Currently, only "InferencePool" is supported when Group is specified.
+	//
+	// +optional
+	// +kubebuilder:validation:MaxLength=63
+	// +kubebuilder:validation:Pattern=`^$|^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
+	Kind *string `json:"kind,omitempty"`
+
 	// Name of the model in the backend. If provided this will override the name provided in the request.
+	// This field is ignored when referencing InferencePool resources.
 	ModelNameOverride string `json:"modelNameOverride,omitempty"`
 
-	// Weight is the weight of the AIServiceBackend. This is exactly the same as the weight in
+	// Weight is the weight of the backend. This is exactly the same as the weight in
 	// the BackendRef in the Gateway API. See for the details:
 	// https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef
 	//
@@ -297,9 +331,10 @@ type AIGatewayRouteRuleBackendRef struct {
 	// +kubebuilder:validation:Minimum=0
 	// +kubebuilder:default=1
 	Weight *int32 `json:"weight,omitempty"`
-	// Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
+	// Priority is the priority of the backend. This sets the priority on the underlying endpoints.
 	// See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
 	// Note: This will override the `faillback` property of the underlying Envoy Gateway Backend
+	// This field is ignored when referencing InferencePool resources.
 	//
 	// Default is 0.
 	//
 
@@ -13,14 +13,19 @@ const (
 	// defaultRequestTimeout is the default timeout for HTTP requests when not specified.
 	// Changed from Envoy Gateway's default of 15s to 60s for AI workloads.
 	defaultRequestTimeout gwapiv1.Duration = "60s"
+
+	// inferencePoolGroup is the API group for InferencePool resources.
+	inferencePoolGroup = "inference.networking.x-k8s.io"
+	// inferencePoolKind is the kind for InferencePool resources.
+	inferencePoolKind = "InferencePool"
 )
 
 // GetTimeoutsWithDefaults returns the timeouts with default values applied when not specified.
 // This ensures that AI Gateway routes have appropriate timeout defaults for AI workloads.
 func (r *AIGatewayRouteRule) GetTimeoutsOrDefault() *gwapiv1.HTTPRouteTimeouts {
 	defaultTimeout := defaultRequestTimeout
 
-	if r.Timeouts == nil {
+	if r == nil || r.Timeouts == nil {
 		// If no timeouts are specified, use default request timeout.
 		return &gwapiv1.HTTPRouteTimeouts{
 			Request: &defaultTimeout,
@@ -37,3 +42,43 @@ func (r *AIGatewayRouteRule) GetTimeoutsOrDefault() *gwapiv1.HTTPRouteTimeouts {
 	// Return as-is if request timeout is already specified.
 	return r.Timeouts
 }
+
+// IsInferencePool returns true if the backend reference points to an InferencePool resource.
+func (ref *AIGatewayRouteRuleBackendRef) IsInferencePool() bool {
+	if ref == nil {
+		return false
+	}
+	return ref.Group != nil && ref.Kind != nil &&
+		*ref.Group == inferencePoolGroup && *ref.Kind == inferencePoolKind
+}
+
+// IsAIServiceBackend returns true if the backend reference points to an AIServiceBackend resource.
+func (ref *AIGatewayRouteRuleBackendRef) IsAIServiceBackend() bool {
+	return !ref.IsInferencePool()
+}
+
+// HasInferencePoolBackends returns true if the rule contains any InferencePool backend references.
+func (r *AIGatewayRouteRule) HasInferencePoolBackends() bool {
+	if r == nil {
+		return false
+	}
+	for _, ref := range r.BackendRefs {
+		if ref.IsInferencePool() {
+			return true
+		}
+	}
+	return false
+}
+
+// HasAIServiceBackends returns true if the rule contains any AIServiceBackend references.
+func (r *AIGatewayRouteRule) HasAIServiceBackends() bool {
+	if r == nil {
+		return false
+	}
+	for _, ref := range r.BackendRefs {
+		if ref.IsAIServiceBackend() {
+			return true
+		}
+	}
+	return false
+}
@@ -73,3 +73,190 @@ func TestAIGatewayRouteRule_GetTimeoutsWithDefaults(t *testing.T) {
 		})
 	}
 }
+
+func TestAIGatewayRouteRuleBackendRef_IsInferencePool(t *testing.T) {
+	tests := []struct {
+		name     string
+		ref      *AIGatewayRouteRuleBackendRef
+		expected bool
+	}{
+		{
+			name: "AIServiceBackend reference (no group/kind)",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name: "test-backend",
+			},
+			expected: false,
+		},
+		{
+			name: "InferencePool reference",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name:  "test-pool",
+				Group: ptr.To(inferencePoolGroup),
+				Kind:  ptr.To(inferencePoolKind),
+			},
+			expected: true,
+		},
+		{
+			name: "Other resource reference",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name:  "test-other",
+				Group: ptr.To("other.group"),
+				Kind:  ptr.To("OtherKind"),
+			},
+			expected: false,
+		},
+		{
+			name: "Partial reference (only group)",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name:  "test-partial",
+				Group: ptr.To(inferencePoolGroup),
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.ref.IsInferencePool()
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestAIGatewayRouteRuleBackendRef_IsAIServiceBackend(t *testing.T) {
+	tests := []struct {
+		name     string
+		ref      *AIGatewayRouteRuleBackendRef
+		expected bool
+	}{
+		{
+			name: "AIServiceBackend reference (no group/kind)",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name: "test-backend",
+			},
+			expected: true,
+		},
+		{
+			name: "InferencePool reference",
+			ref: &AIGatewayRouteRuleBackendRef{
+				Name:  "test-pool",
+				Group: ptr.To(inferencePoolGroup),
+				Kind:  ptr.To(inferencePoolKind),
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.ref.IsAIServiceBackend()
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestAIGatewayRouteRule_HasInferencePoolBackends(t *testing.T) {
+	tests := []struct {
+		name     string
+		rule     *AIGatewayRouteRule
+		expected bool
+	}{
+		{
+			name: "No backends",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{},
+			},
+			expected: false,
+		},
+		{
+			name: "Only AIServiceBackend references",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{
+					{Name: "backend1"},
+					{Name: "backend2"},
+				},
+			},
+			expected: false,
+		},
+		{
+			name: "Only InferencePool reference",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{
+					{
+						Name:  "pool1",
+						Group: ptr.To(inferencePoolGroup),
+						Kind:  ptr.To(inferencePoolKind),
+					},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "Mixed references (should not happen due to validation)",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{
+					{Name: "backend1"},
+					{
+						Name:  "pool1",
+						Group: ptr.To(inferencePoolGroup),
+						Kind:  ptr.To(inferencePoolKind),
+					},
+				},
+			},
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.rule.HasInferencePoolBackends()
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestAIGatewayRouteRule_HasAIServiceBackends(t *testing.T) {
+	tests := []struct {
+		name     string
+		rule     *AIGatewayRouteRule
+		expected bool
+	}{
+		{
+			name: "No backends",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{},
+			},
+			expected: false,
+		},
+		{
+			name: "Only AIServiceBackend references",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{
+					{Name: "backend1"},
+					{Name: "backend2"},
+				},
+			},
+			expected: true,
+		},
+		{
+			name: "Only InferencePool reference",
+			rule: &AIGatewayRouteRule{
+				BackendRefs: []AIGatewayRouteRuleBackendRef{
+					{
+						Name:  "pool1",
+						Group: ptr.To(inferencePoolGroup),
+						Kind:  ptr.To(inferencePoolKind),
+					},
+				},
+			},
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.rule.HasAIServiceBackends()
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
@@ -23,11 +23,25 @@ logging:
 extensionApis:
   enableBackend: true
 extensionManager:
+  backendResources:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      version: v1alpha2
   hooks:
     xdsTranslator:
+      translation:
+        listener:
+          includeAll: true
+        route:
+          includeAll: true
+        cluster:
+          includeAll: true
+        secret:
+          includeAll: true
       post:
-        - VirtualHost
         - Translation
+        - Cluster
+        - Route
   service:
     fqdn:
       hostname: localhost