Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ linters:
alias: egv1a1
- pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1
alias: aigv1a1
- pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2
alias: gwaiev1a2
- pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1
alias: gwaiev1
- pkg: k8s.io/apimachinery/pkg/apis/meta/v1
alias: metav1
- pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1
Expand Down
4 changes: 2 additions & 2 deletions api/v1alpha1/ai_gateway_route.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ type AIGatewayRouteRule struct {
// It can reference either an AIServiceBackend or an InferencePool resource.
//
// +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported"
type AIGatewayRouteRuleBackendRef struct {
// Name is the name of the backend resource.
// When Group and Kind are not specified, this refers to an AIServiceBackend.
Expand All @@ -278,7 +278,7 @@ type AIGatewayRouteRuleBackendRef struct {

// Group is the group of the backend resource.
// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
// Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
//
// +optional
// +kubebuilder:validation:MaxLength=253
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/ai_gateway_route_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const (
defaultRequestTimeout gwapiv1.Duration = "60s"

// inferencePoolGroup is the API group for InferencePool resources.
inferencePoolGroup = "inference.networking.x-k8s.io"
inferencePoolGroup = "inference.networking.k8s.io"
// inferencePoolKind is the kind for InferencePool resources.
inferencePoolKind = "InferencePool"
)
Expand Down
2 changes: 1 addition & 1 deletion cmd/aigw/envoy-gateway-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ extensionApis:
enableBackend: true
extensionManager:
backendResources:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
version: v1alpha2
hooks:
Expand Down
22 changes: 11 additions & 11 deletions docs/proposals/003-epp-integration-proposal/proposal.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc
The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below:

```
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
Expand Down Expand Up @@ -81,7 +81,7 @@ spec:
name: inference-gateway
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
matches:
Expand Down Expand Up @@ -209,7 +209,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe
- When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct`

```
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
Expand Down Expand Up @@ -249,7 +249,7 @@ spec:
value: vllm-llama3-8b-instruct
backendRefs:
- name: vllm-llama3-8b-instruct
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
kind: InferencePool
```

Expand All @@ -269,7 +269,7 @@ This approach is preferred because InferencePool resources do not require Backen
- When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct`

```yaml
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
Expand Down Expand Up @@ -319,7 +319,7 @@ spec:
name: OpenAI
backendRef:
name: vllm-llama3-8b-instruct
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
kind: InferencePool
```

Expand Down Expand Up @@ -384,7 +384,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h
Take the configuration below as an example:

```yaml
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
Expand Down Expand Up @@ -417,7 +417,7 @@ spec:
value: vllm-llama3-8b-instruct
backendRefs:
- name: vllm-llama3-8b-instruct
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
kind: InferencePool
```

Expand Down Expand Up @@ -582,7 +582,7 @@ spec:
name: x-ai-eg-model
value: meta-llama/Llama-3.1-8B-Instruct
backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
- matches:
Expand All @@ -591,7 +591,7 @@ spec:
name: x-ai-eg-model
value: mistral:latest
backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: mistral
- matches:
Expand Down Expand Up @@ -619,7 +619,7 @@ spec:
namespace: default
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
Expand Down
4 changes: 2 additions & 2 deletions examples/inference-pool/aigwroute.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
name: Authorization
value: sk-zyxwvutsrqponmlkjihgfedcba
backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
- matches:
Expand All @@ -58,7 +58,7 @@ spec:
name: x-ai-eg-model
value: mistral:latest
backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: mistral
- matches:
Expand Down
16 changes: 8 additions & 8 deletions examples/inference-pool/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
initialDelaySeconds: 1
periodSeconds: 1
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: mistral
Expand All @@ -61,16 +61,16 @@ spec:
extensionRef:
name: mistral-epp
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferenceModel
apiVersion: inference.networking.k8s.io/v1
kind: InferenceObjective
metadata:
name: mistral
namespace: default
spec:
modelName: mistral:latest
criticality: Critical
poolRef:
# Bind the InferenceModel to the InferencePool.
# Bind the InferenceObjective to the InferencePool.
name: mistral
---
apiVersion: v1
Expand Down Expand Up @@ -158,7 +158,7 @@ metadata:
namespace: default
data:
default-plugins.yaml: |
apiVersion: inference.networking.x-k8s.io/v1alpha1
apiVersion: inference.networking.k8s.io/v1alpha1
kind: EndpointPickerConfig
plugins:
- type: low-queue-filter
Expand Down Expand Up @@ -208,7 +208,7 @@ data:
- pluginRef: low-latency-filter
- pluginRef: random-picker
plugins-v2.yaml: |
apiVersion: inference.networking.x-k8s.io/v1alpha1
apiVersion: inference.networking.k8s.io/v1alpha1
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
Expand Down Expand Up @@ -238,10 +238,10 @@ apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-read
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
- apiGroups: ["inference.networking.k8s.io"]
resources: ["inferencepools"]
verbs: ["get", "watch", "list"]
- apiGroups: ["inference.networking.x-k8s.io"]
- apiGroups: ["inference.networking.k8s.io"]
resources: ["inferencemodels"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
Expand Down
2 changes: 1 addition & 1 deletion examples/inference-pool/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ data:
enableBackend: true
extensionManager:
backendResources:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
version: v1alpha2
hooks:
Expand Down
2 changes: 1 addition & 1 deletion examples/inference-pool/httproute.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spec:
namespace: default
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ require (
k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
sigs.k8s.io/controller-runtime v0.22.1
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c
sigs.k8s.io/gateway-api-inference-extension v0.5.1
sigs.k8s.io/gateway-api-inference-extension v1.0.0
sigs.k8s.io/yaml v1.6.0
)

Expand Down
15 changes: 9 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJ
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
Expand Down Expand Up @@ -344,10 +347,10 @@ github.com/ohler55/ojg v1.26.10/go.mod h1:/Y5dGWkekv9ocnUixuETqiL58f+5pAsUfg5P8e
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
github.com/onsi/ginkgo/v2 v2.24.0 h1:obZz8LAnHicNdbBqvG3ytAFx8fgza+i1IDpBVcHT2YE=
github.com/onsi/ginkgo/v2 v2.24.0/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk=
github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k=
Expand Down Expand Up @@ -661,8 +664,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV
sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c h1:GS4VnGRV90GEUjrgQ2GT5ii6yzWj3KtgUg+sVMdhs5c=
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
sigs.k8s.io/gateway-api-inference-extension v0.5.1 h1:OMpt4gKlPWkD+h5kHcZZVh4926kix2DSBPI7X5ntuCA=
sigs.k8s.io/gateway-api-inference-extension v0.5.1/go.mod h1:lki0jx1qysZSZT4Ai2BxuAcpx6G8g5oBgOGuuJzjy/k=
sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/kubectl-validate v0.0.5-0.20250915070809-d2f2d68fba09 h1:JQbPOwLjSztom+aSDQIi6UZq8V0Gbv7BjAlYQSgycCI=
Expand Down
4 changes: 2 additions & 2 deletions internal/controller/ai_gateway_route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
Group: ptr.To("inference.networking.x-k8s.io"),
Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
Weight: ptr.To(int32(100)),
},
Expand All @@ -460,7 +460,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {

// Check the first rule (our InferencePool rule).
backendRef := httpRoute.Spec.Rules[0].BackendRefs[0]
require.Equal(t, "inference.networking.x-k8s.io", string(*backendRef.Group))
require.Equal(t, "inference.networking.k8s.io", string(*backendRef.Group))
require.Equal(t, "InferencePool", string(*backendRef.Kind))
require.Equal(t, "test-inference-pool", string(backendRef.Name))
require.Equal(t, "test-ns", string(*backendRef.Namespace))
Expand Down
8 changes: 4 additions & 4 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
gwapiv1a3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
gwapiv1b1 "sigs.k8s.io/gateway-api/apis/v1beta1"
Expand All @@ -50,7 +50,7 @@ func init() {
utilruntime.Must(gwapiv1.Install(Scheme))
utilruntime.Must(gwapiv1a3.Install(Scheme))
utilruntime.Must(gwapiv1b1.Install(Scheme))
utilruntime.Must(gwaiev1a2.Install(Scheme))
utilruntime.Must(gwaiev1.Install(Scheme))
}

// Scheme contains the necessary schemes for the AI Gateway.
Expand Down Expand Up @@ -161,7 +161,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
if err != nil {
return fmt.Errorf("failed to create CRD client for inference extension: %w", err)
}
const inferencePoolCRD = "inferencepools.inference.networking.x-k8s.io"
const inferencePoolCRD = "inferencepools.inference.networking.k8s.io"
if _, crdErr := crdClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, inferencePoolCRD, metav1.GetOptions{}); crdErr != nil {
if apierrors.IsNotFound(crdErr) {
logger.Info("InferencePool CRD not found, skipping InferencePool controller. " +
Expand All @@ -173,7 +173,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
// CRD exists, create the controller.
inferencePoolC := NewInferencePoolController(c, kubernetes.NewForConfigOrDie(config), logger.
WithName("inference-pool"))
if err = TypedControllerBuilderForCRD(mgr, &gwaiev1a2.InferencePool{}).
if err = TypedControllerBuilderForCRD(mgr, &gwaiev1.InferencePool{}).
Watches(&gwapiv1.Gateway{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.gatewayEventHandler)).
Watches(&aigv1a1.AIGatewayRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.aiGatewayRouteEventHandler)).
Watches(&gwapiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.httpRouteEventHandler)).
Expand Down
Loading
Loading