diff --git a/.golangci.yml b/.golangci.yml
index 9f095e94e1..ba05b762db 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -44,8 +44,8 @@ linters:
alias: egv1a1
- pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1
alias: aigv1a1
- - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2
- alias: gwaiev1a2
+ - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1
+ alias: gwaiev1
- pkg: k8s.io/apimachinery/pkg/apis/meta/v1
alias: metav1
- pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1
diff --git a/api/v1alpha1/ai_gateway_route.go b/api/v1alpha1/ai_gateway_route.go
index 3eba1315c2..bcc8c958f5 100644
--- a/api/v1alpha1/ai_gateway_route.go
+++ b/api/v1alpha1/ai_gateway_route.go
@@ -266,7 +266,7 @@ type AIGatewayRouteRule struct {
// It can reference either an AIServiceBackend or an InferencePool resource.
//
// +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
-// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
+// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported"
type AIGatewayRouteRuleBackendRef struct {
// Name is the name of the backend resource.
// When Group and Kind are not specified, this refers to an AIServiceBackend.
@@ -278,7 +278,7 @@ type AIGatewayRouteRuleBackendRef struct {
// Group is the group of the backend resource.
// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
- // Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+ // Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
//
// +optional
// +kubebuilder:validation:MaxLength=253
diff --git a/api/v1alpha1/ai_gateway_route_helper.go b/api/v1alpha1/ai_gateway_route_helper.go
index a09891ab57..7779a2269f 100644
--- a/api/v1alpha1/ai_gateway_route_helper.go
+++ b/api/v1alpha1/ai_gateway_route_helper.go
@@ -15,7 +15,7 @@ const (
defaultRequestTimeout gwapiv1.Duration = "60s"
// inferencePoolGroup is the API group for InferencePool resources.
- inferencePoolGroup = "inference.networking.x-k8s.io"
+ inferencePoolGroup = "inference.networking.k8s.io"
// inferencePoolKind is the kind for InferencePool resources.
inferencePoolKind = "InferencePool"
)
diff --git a/cmd/aigw/envoy-gateway-config.yaml b/cmd/aigw/envoy-gateway-config.yaml
index 391bf17d24..ea0a22c4de 100644
--- a/cmd/aigw/envoy-gateway-config.yaml
+++ b/cmd/aigw/envoy-gateway-config.yaml
@@ -24,7 +24,7 @@ extensionApis:
enableBackend: true
extensionManager:
backendResources:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
version: v1alpha2
hooks:
diff --git a/docs/proposals/003-epp-integration-proposal/proposal.md b/docs/proposals/003-epp-integration-proposal/proposal.md
index 1cfca6f866..da8d0c5147 100644
--- a/docs/proposals/003-epp-integration-proposal/proposal.md
+++ b/docs/proposals/003-epp-integration-proposal/proposal.md
@@ -51,7 +51,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc
The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below:
```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
@@ -81,7 +81,7 @@ spec:
name: inference-gateway
rules:
- backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
matches:
@@ -209,7 +209,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe
- When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct`
```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
@@ -249,7 +249,7 @@ spec:
value: vllm-llama3-8b-instruct
backendRefs:
- name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
```
@@ -269,7 +269,7 @@ This approach is preferred because InferencePool resources do not require Backen
- When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct`
```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
@@ -319,7 +319,7 @@ spec:
name: OpenAI
backendRef:
name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
```
@@ -384,7 +384,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h
Take the configuration below as an example:
```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
@@ -417,7 +417,7 @@ spec:
value: vllm-llama3-8b-instruct
backendRefs:
- name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
```
@@ -582,7 +582,7 @@ spec:
name: x-ai-eg-model
value: meta-llama/Llama-3.1-8B-Instruct
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
- matches:
@@ -591,7 +591,7 @@ spec:
name: x-ai-eg-model
value: mistral:latest
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: mistral
- matches:
@@ -619,7 +619,7 @@ spec:
namespace: default
rules:
- backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
diff --git a/examples/inference-pool/aigwroute.yaml b/examples/inference-pool/aigwroute.yaml
index 88daa78d30..6e11dd6c7c 100644
--- a/examples/inference-pool/aigwroute.yaml
+++ b/examples/inference-pool/aigwroute.yaml
@@ -49,7 +49,7 @@ spec:
name: Authorization
value: sk-zyxwvutsrqponmlkjihgfedcba
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
- matches:
@@ -58,7 +58,7 @@ spec:
name: x-ai-eg-model
value: mistral:latest
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: mistral
- matches:
diff --git a/examples/inference-pool/base.yaml b/examples/inference-pool/base.yaml
index fde5878f15..a1979d3399 100644
--- a/examples/inference-pool/base.yaml
+++ b/examples/inference-pool/base.yaml
@@ -49,31 +49,40 @@ spec:
initialDelaySeconds: 1
periodSeconds: 1
---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: mistral
namespace: default
spec:
- targetPortNumber: 8080
+ targetPorts:
+ - number: 8080
selector:
- app: mistral-upstream
- extensionRef:
+ matchLabels:
+ app: mistral-upstream
+ endpointPickerRef:
name: mistral-epp
+ port:
+ number: 9002
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
-kind: InferenceModel
+kind: InferenceObjective
metadata:
name: mistral
namespace: default
spec:
- modelName: mistral:latest
- criticality: Critical
+ priority: 10
poolRef:
- # Bind the InferenceModel to the InferencePool.
+ # Bind the InferenceObjective to the InferencePool.
name: mistral
---
apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: mistral-epp
+ namespace: default
+---
+apiVersion: v1
kind: Service
metadata:
name: mistral-epp
@@ -105,26 +114,27 @@ spec:
labels:
app: mistral-epp
spec:
+ serviceAccountName: mistral-epp
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
terminationGracePeriodSeconds: 130
containers:
- name: epp
- image: registry.k8s.io/gateway-api-inference-extension/epp:v0.5.1
+ image: registry.k8s.io/gateway-api-inference-extension/epp:v1.0.1
imagePullPolicy: IfNotPresent
args:
- - -poolName
+ - --pool-name
- "mistral"
- - "-poolNamespace"
+ - "--pool-namespace"
- "default"
- - -v
+ - --v
- "4"
- --zap-encoder
- "json"
- - -grpcPort
+ - --grpc-port
- "9002"
- - -grpcHealthPort
+ - --grpc-health-port
- "9003"
- - "-configFile"
+ - "--config-file"
- "/config/default-plugins.yaml"
ports:
- containerPort: 9002
@@ -158,95 +168,54 @@ metadata:
namespace: default
data:
default-plugins.yaml: |
- apiVersion: inference.networking.x-k8s.io/v1alpha1
- kind: EndpointPickerConfig
- plugins:
- - type: low-queue-filter
- parameters:
- threshold: 128
- - type: lora-affinity-filter
- parameters:
- threshold: 0.999
- - type: least-queue-filter
- - type: least-kv-cache-filter
- - type: decision-tree-filter
- name: low-latency-filter
- parameters:
- current:
- pluginRef: low-queue-filter
- nextOnSuccess:
- decisionTree:
- current:
- pluginRef: lora-affinity-filter
- nextOnSuccessOrFailure:
- decisionTree:
- current:
- pluginRef: least-queue-filter
- nextOnSuccessOrFailure:
- decisionTree:
- current:
- pluginRef: least-kv-cache-filter
- nextOnFailure:
- decisionTree:
- current:
- pluginRef: least-queue-filter
- nextOnSuccessOrFailure:
- decisionTree:
- current:
- pluginRef: lora-affinity-filter
- nextOnSuccessOrFailure:
- decisionTree:
- current:
- pluginRef: least-kv-cache-filter
- - type: random-picker
- parameters:
- maxNumOfEndpoints: 1
- - type: single-profile-handler
- schedulingProfiles:
- - name: default
- plugins:
- - pluginRef: low-latency-filter
- - pluginRef: random-picker
- plugins-v2.yaml: |
apiVersion: inference.networking.x-k8s.io/v1alpha1
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
- - type: kv-cache-scorer
+ - type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
- parameters:
- hashBlockSize: 64
- maxPrefixBlocksToMatch: 256
- lruCapacityPerServer: 31250
- - type: max-score-picker
- parameters:
- maxNumOfEndpoints: 1
- - type: single-profile-handler
schedulingProfiles:
- name: default
plugins:
- pluginRef: queue-scorer
- weight: 1
- - pluginRef: kv-cache-scorer
- weight: 1
+ - pluginRef: kv-cache-utilization-scorer
- pluginRef: prefix-cache-scorer
- weight: 1
- - pluginRef: max-score-picker
---
-kind: ClusterRole
+kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-read
+ namespace: default
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
- resources: ["inferencepools"]
+ resources: ["inferenceobjectives", "inferencepools"]
verbs: ["get", "watch", "list"]
- - apiGroups: ["inference.networking.x-k8s.io"]
- resources: ["inferencemodels"]
+ - apiGroups: ["inference.networking.k8s.io"]
+ resources: ["inferencepools"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list"]
+---
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: pod-read-binding
+ namespace: default
+subjects:
+ - kind: ServiceAccount
+ name: mistral-epp
+ namespace: default
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: pod-read
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: auth-reviewer
+rules:
- apiGroups:
- authentication.k8s.io
resources:
@@ -263,15 +232,15 @@ rules:
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
- name: pod-read-binding
+ name: auth-reviewer-binding
subjects:
- kind: ServiceAccount
- name: default
+ name: mistral-epp
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
- name: pod-read
+ name: auth-reviewer
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
diff --git a/examples/inference-pool/config.yaml b/examples/inference-pool/config.yaml
index 261c0fc817..3827de683e 100644
--- a/examples/inference-pool/config.yaml
+++ b/examples/inference-pool/config.yaml
@@ -42,9 +42,9 @@ data:
enableBackend: true
extensionManager:
backendResources:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
- version: v1alpha2
+ version: v1
hooks:
xdsTranslator:
translation:
diff --git a/examples/inference-pool/httproute.yaml b/examples/inference-pool/httproute.yaml
index ee4d3469f6..e68d873865 100644
--- a/examples/inference-pool/httproute.yaml
+++ b/examples/inference-pool/httproute.yaml
@@ -35,7 +35,7 @@ spec:
namespace: default
rules:
- backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
diff --git a/examples/inference-pool/with-annotations.yaml b/examples/inference-pool/with-annotations.yaml
index fd9488dcc9..0698a1f7bc 100644
--- a/examples/inference-pool/with-annotations.yaml
+++ b/examples/inference-pool/with-annotations.yaml
@@ -53,7 +53,7 @@ spec:
initialDelaySeconds: 1
periodSeconds: 1
---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: mistral-with-annotations
@@ -68,22 +68,26 @@ metadata:
# This corresponds to the AllowModeOverride field in Envoy's ExternalProcessor
aigateway.envoyproxy.io/allow-mode-override: "true"
spec:
- targetPortNumber: 8080
+ targetPorts:
+ - number: 8080
selector:
- app: mistral-upstream
- extensionRef:
+ matchLabels:
+ app: mistral-upstream
+ endpointPickerRef:
name: mistral-epp-with-annotations
+ port:
+ number: 9002
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
-kind: InferenceModel
+kind: InferenceObjective
metadata:
name: mistral-with-annotations
namespace: default
spec:
- modelName: mistral:latest
- criticality: Critical
+ priority: 10
poolRef:
- # Bind the InferenceModel to the InferencePool.
+ # Bind the InferenceObjective to the InferencePool.
+ group: inference.networking.k8s.io
name: mistral-with-annotations
---
apiVersion: v1
diff --git a/go.mod b/go.mod
index e3687a0625..171847c2d5 100644
--- a/go.mod
+++ b/go.mod
@@ -67,7 +67,7 @@ require (
k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
sigs.k8s.io/controller-runtime v0.22.1
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c
- sigs.k8s.io/gateway-api-inference-extension v0.5.1
+ sigs.k8s.io/gateway-api-inference-extension v1.0.1
sigs.k8s.io/yaml v1.6.0
)
diff --git a/go.sum b/go.sum
index da00b44242..1ac441db35 100644
--- a/go.sum
+++ b/go.sum
@@ -26,6 +26,9 @@ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJ
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI=
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
+github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
@@ -344,10 +347,10 @@ github.com/ohler55/ojg v1.26.10/go.mod h1:/Y5dGWkekv9ocnUixuETqiL58f+5pAsUfg5P8e
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
-github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
-github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
-github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
-github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
+github.com/onsi/ginkgo/v2 v2.24.0 h1:obZz8LAnHicNdbBqvG3ytAFx8fgza+i1IDpBVcHT2YE=
+github.com/onsi/ginkgo/v2 v2.24.0/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk=
+github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
+github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k=
@@ -661,8 +664,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV
sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c h1:GS4VnGRV90GEUjrgQ2GT5ii6yzWj3KtgUg+sVMdhs5c=
sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
-sigs.k8s.io/gateway-api-inference-extension v0.5.1 h1:OMpt4gKlPWkD+h5kHcZZVh4926kix2DSBPI7X5ntuCA=
-sigs.k8s.io/gateway-api-inference-extension v0.5.1/go.mod h1:lki0jx1qysZSZT4Ai2BxuAcpx6G8g5oBgOGuuJzjy/k=
+sigs.k8s.io/gateway-api-inference-extension v1.0.1 h1:n/zyxk/1RCT1nNoCdKiZsN7XTz9mTk3Cu1fWWbtZMBw=
+sigs.k8s.io/gateway-api-inference-extension v1.0.1/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/kubectl-validate v0.0.5-0.20250915070809-d2f2d68fba09 h1:JQbPOwLjSztom+aSDQIi6UZq8V0Gbv7BjAlYQSgycCI=
diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go
index ca06fd6161..5ec9a25d8c 100644
--- a/internal/controller/ai_gateway_route_test.go
+++ b/internal/controller/ai_gateway_route_test.go
@@ -437,7 +437,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
Weight: ptr.To(int32(100)),
},
@@ -460,7 +460,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {
// Check the first rule (our InferencePool rule).
backendRef := httpRoute.Spec.Rules[0].BackendRefs[0]
- require.Equal(t, "inference.networking.x-k8s.io", string(*backendRef.Group))
+ require.Equal(t, "inference.networking.k8s.io", string(*backendRef.Group))
require.Equal(t, "InferencePool", string(*backendRef.Kind))
require.Equal(t, "test-inference-pool", string(backendRef.Name))
require.Equal(t, "test-ns", string(*backendRef.Namespace))
diff --git a/internal/controller/controller.go b/internal/controller/controller.go
index b0abd2a8d3..1a4c9e2bb7 100644
--- a/internal/controller/controller.go
+++ b/internal/controller/controller.go
@@ -34,7 +34,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
gwapiv1a3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
gwapiv1b1 "sigs.k8s.io/gateway-api/apis/v1beta1"
@@ -50,7 +50,7 @@ func init() {
utilruntime.Must(gwapiv1.Install(Scheme))
utilruntime.Must(gwapiv1a3.Install(Scheme))
utilruntime.Must(gwapiv1b1.Install(Scheme))
- utilruntime.Must(gwaiev1a2.Install(Scheme))
+ utilruntime.Must(gwaiev1.Install(Scheme))
}
// Scheme contains the necessary schemes for the AI Gateway.
@@ -161,7 +161,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
if err != nil {
return fmt.Errorf("failed to create CRD client for inference extension: %w", err)
}
- const inferencePoolCRD = "inferencepools.inference.networking.x-k8s.io"
+ const inferencePoolCRD = "inferencepools.inference.networking.k8s.io"
if _, crdErr := crdClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, inferencePoolCRD, metav1.GetOptions{}); crdErr != nil {
if apierrors.IsNotFound(crdErr) {
logger.Info("InferencePool CRD not found, skipping InferencePool controller. " +
@@ -173,7 +173,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
// CRD exists, create the controller.
inferencePoolC := NewInferencePoolController(c, kubernetes.NewForConfigOrDie(config), logger.
WithName("inference-pool"))
- if err = TypedControllerBuilderForCRD(mgr, &gwaiev1a2.InferencePool{}).
+ if err = TypedControllerBuilderForCRD(mgr, &gwaiev1.InferencePool{}).
Watches(&gwapiv1.Gateway{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.gatewayEventHandler)).
Watches(&aigv1a1.AIGatewayRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.aiGatewayRouteEventHandler)).
Watches(&gwapiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.httpRouteEventHandler)).
diff --git a/internal/controller/inference_pool.go b/internal/controller/inference_pool.go
index 47381b5962..825f924120 100644
--- a/internal/controller/inference_pool.go
+++ b/internal/controller/inference_pool.go
@@ -17,13 +17,13 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1"
)
-// InferencePoolController implements [reconcile.TypedReconciler] for [gwaiev1a2.InferencePool].
+// InferencePoolController implements [reconcile.TypedReconciler] for [gwaiev1.InferencePool].
//
// This handles the InferencePool resource and updates its status based on associated Gateways.
//
@@ -34,7 +34,7 @@ type InferencePoolController struct {
logger logr.Logger
}
-// NewInferencePoolController creates a new reconcile.TypedReconciler for gwaiev1a2.InferencePool.
+// NewInferencePoolController creates a new reconcile.TypedReconciler for gwaiev1.InferencePool.
func NewInferencePoolController(
client client.Client, kube kubernetes.Interface, logger logr.Logger,
) *InferencePoolController {
@@ -45,9 +45,9 @@ func NewInferencePoolController(
}
}
-// Reconcile implements the [reconcile.TypedReconciler] for [gwaiev1a2.InferencePool].
+// Reconcile implements the [reconcile.TypedReconciler] for [gwaiev1.InferencePool].
func (c *InferencePoolController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
- var inferencePool gwaiev1a2.InferencePool
+ var inferencePool gwaiev1.InferencePool
if err := c.client.Get(ctx, req.NamespacedName, &inferencePool); err != nil {
if client.IgnoreNotFound(err) == nil {
c.logger.Info("Deleting InferencePool",
@@ -69,7 +69,7 @@ func (c *InferencePoolController) Reconcile(ctx context.Context, req reconcile.R
// syncInferencePool is the main logic for reconciling the InferencePool resource.
// This is decoupled from the Reconcile method to centralize the error handling and status updates.
-func (c *InferencePoolController) syncInferencePool(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) error {
+func (c *InferencePoolController) syncInferencePool(ctx context.Context, inferencePool *gwaiev1.InferencePool) error {
// Check if the ExtensionReference service exists.
if err := c.validateExtensionReference(ctx, inferencePool); err != nil {
return err
@@ -97,7 +97,7 @@ func (c *InferencePoolController) routeReferencesInferencePool(route *aigv1a1.AI
}
// getReferencedGateways returns all Gateways that reference the given InferencePool.
-func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) (map[string]*gwapiv1.Gateway, error) {
+func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inferencePool *gwaiev1.InferencePool) (map[string]*gwapiv1.Gateway, error) {
// Find all Gateways across all namespaces.
var gateways gwapiv1.GatewayList
if err := c.client.List(ctx, &gateways); err != nil {
@@ -119,14 +119,9 @@ func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inf
}
// validateExtensionReference checks if the ExtensionReference service exists.
-func (c *InferencePoolController) validateExtensionReference(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) error {
- // Check if ExtensionRef is specified.
- if inferencePool.Spec.ExtensionRef == nil {
- return nil // No extension reference to validate.
- }
-
+func (c *InferencePoolController) validateExtensionReference(ctx context.Context, inferencePool *gwaiev1.InferencePool) error {
// Get the service name from ExtensionReference.
- serviceName := inferencePool.Spec.ExtensionRef.Name
+ serviceName := inferencePool.Spec.EndpointPickerRef.Name
if serviceName == "" {
return fmt.Errorf("ExtensionReference name is empty")
}
@@ -218,7 +213,7 @@ func (c *InferencePoolController) routeReferencesGateway(parentRefs []gwapiv1.Pa
func (c *InferencePoolController) httpRouteReferencesInferencePool(route *gwapiv1.HTTPRoute, inferencePoolName string) bool {
for _, rule := range route.Spec.Rules {
for _, backendRef := range rule.BackendRefs {
- if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.x-k8s.io" &&
+ if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.k8s.io" &&
backendRef.Kind != nil && string(*backendRef.Kind) == "InferencePool" &&
string(backendRef.Name) == inferencePoolName {
return true
@@ -229,7 +224,7 @@ func (c *InferencePoolController) httpRouteReferencesInferencePool(route *gwapiv
}
// updateInferencePoolStatus updates the status of the InferencePool.
-func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, inferencePool *gwaiev1a2.InferencePool, conditionType string, message string) {
+func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, inferencePool *gwaiev1.InferencePool, conditionType string, message string) {
// Check if this is an ExtensionReference validation error.
isExtensionRefError := conditionType == "NotAccepted" &&
(strings.Contains(message, "ExtensionReference service") && strings.Contains(message, "not found"))
@@ -241,17 +236,17 @@ func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context,
}
// Build Parents status.
- var parents []gwaiev1a2.PoolStatus
+ var parents []gwaiev1.ParentStatus
for _, gw := range referencedGateways {
// Set Gateway group and kind according to Gateway API defaults.
gatewayGroup := "gateway.networking.k8s.io"
gatewayKind := "Gateway"
- parentRef := gwaiev1a2.ParentGatewayReference{
- Group: (*gwaiev1a2.Group)(&gatewayGroup),
- Kind: (*gwaiev1a2.Kind)(&gatewayKind),
- Name: gwaiev1a2.ObjectName(gw.Name),
- Namespace: (*gwaiev1a2.Namespace)(&gw.Namespace),
+ parentRef := gwaiev1.ParentReference{
+ Group: (*gwaiev1.Group)(&gatewayGroup),
+ Kind: gwaiev1.Kind(gatewayKind),
+ Name: gwaiev1.ObjectName(gw.Name),
+ Namespace: gwaiev1.Namespace(gw.Namespace),
}
var conditions []metav1.Condition
@@ -270,8 +265,8 @@ func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context,
conditions = append(conditions, resolvedRefsCondition)
}
- parents = append(parents, gwaiev1a2.PoolStatus{
- GatewayRef: parentRef,
+ parents = append(parents, gwaiev1.ParentStatus{
+ ParentRef: parentRef,
Conditions: conditions,
})
}
@@ -313,7 +308,7 @@ func (c *InferencePoolController) gatewayEventHandler(ctx context.Context, obj c
}
// Find all InferencePools in the same namespace that might be affected by this Gateway.
- var inferencePools gwaiev1a2.InferencePoolList
+ var inferencePools gwaiev1.InferencePoolList
if err := c.client.List(ctx, &inferencePools, client.InNamespace(gateway.Namespace)); err != nil {
c.logger.Error(err, "failed to list InferencePools for Gateway event", "gateway", gateway.Name)
return nil
@@ -371,7 +366,7 @@ func (c *InferencePoolController) httpRouteEventHandler(_ context.Context, obj c
var requests []reconcile.Request
for _, rule := range route.Spec.Rules {
for _, backendRef := range rule.BackendRefs {
- if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.x-k8s.io" &&
+ if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.k8s.io" &&
backendRef.Kind != nil && string(*backendRef.Kind) == "InferencePool" {
requests = append(requests, reconcile.Request{
NamespacedName: client.ObjectKey{
diff --git a/internal/controller/inference_pool_test.go b/internal/controller/inference_pool_test.go
index 1cf03f4bfb..161056ff91 100644
--- a/internal/controller/inference_pool_test.go
+++ b/internal/controller/inference_pool_test.go
@@ -17,7 +17,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1"
@@ -28,7 +28,7 @@ func requireNewFakeClientWithIndexesAndInferencePool(t *testing.T) client.Client
WithStatusSubresource(&aigv1a1.AIGatewayRoute{}).
WithStatusSubresource(&aigv1a1.AIServiceBackend{}).
WithStatusSubresource(&aigv1a1.BackendSecurityPolicy{}).
- WithStatusSubresource(&gwaiev1a2.InferencePool{})
+ WithStatusSubresource(&gwaiev1.InferencePool{})
err := ApplyIndexing(t.Context(), func(_ context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error {
builder = builder.WithIndex(obj, field, extractValue)
return nil
@@ -42,22 +42,18 @@ func TestInferencePoolController_ExtensionReferenceValidation(t *testing.T) {
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Create an InferencePool with ExtensionReference pointing to a non-existent service.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "non-existent-service",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "non-existent-service",
},
},
}
@@ -75,7 +71,7 @@ func TestInferencePoolController_ExtensionReferenceValidation(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status was updated with ResolvedRefs condition.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -107,22 +103,18 @@ func TestInferencePoolController_ExtensionReferenceValidationSuccess(t *testing.
require.NoError(t, fakeClient.Create(context.Background(), service))
// Create an InferencePool with ExtensionReference pointing to the existing service.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "existing-service",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "existing-service",
},
},
}
@@ -139,7 +131,7 @@ func TestInferencePoolController_ExtensionReferenceValidationSuccess(t *testing.
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status was updated successfully.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -198,7 +190,7 @@ func TestInferencePoolController_Reconcile(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
Weight: ptr.To(int32(100)),
},
@@ -210,22 +202,18 @@ func TestInferencePoolController_Reconcile(t *testing.T) {
require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute))
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
@@ -242,7 +230,7 @@ func TestInferencePoolController_Reconcile(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status was updated.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -252,10 +240,10 @@ func TestInferencePoolController_Reconcile(t *testing.T) {
require.Len(t, updatedInferencePool.Status.Parents, 1)
parent := updatedInferencePool.Status.Parents[0]
- require.Equal(t, "gateway.networking.k8s.io", string(*parent.GatewayRef.Group))
- require.Equal(t, "Gateway", string(*parent.GatewayRef.Kind))
- require.Equal(t, "test-gateway", string(parent.GatewayRef.Name))
- require.Equal(t, "default", string(*parent.GatewayRef.Namespace))
+ require.Equal(t, "gateway.networking.k8s.io", string(*parent.ParentRef.Group))
+ require.Equal(t, "Gateway", string(parent.ParentRef.Kind))
+ require.Equal(t, "test-gateway", string(parent.ParentRef.Name))
+ require.Equal(t, "default", string(parent.ParentRef.Namespace))
// Verify that the conditions are set correctly.
require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions")
@@ -303,22 +291,18 @@ func TestInferencePoolController_NoReferencingGateways(t *testing.T) {
require.NoError(t, fakeClient.Create(context.Background(), service))
// Create an InferencePool without any referencing AIGatewayRoutes.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
@@ -335,7 +319,7 @@ func TestInferencePoolController_NoReferencingGateways(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status was updated.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -410,7 +394,7 @@ func TestInferencePoolController_HTTPRouteReferencesInferencePool(t *testing.T)
{
BackendRef: gwapiv1.BackendRef{
BackendObjectReference: gwapiv1.BackendObjectReference{
- Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")),
+ Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")),
Kind: ptr.To(gwapiv1.Kind("InferencePool")),
Name: "test-inference-pool",
},
@@ -529,7 +513,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -557,16 +541,16 @@ func TestInferencePoolController_gatewayEventHandler(t *testing.T) {
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
require.NoError(t, fakeClient.Create(context.Background(), inferencePool))
@@ -588,7 +572,7 @@ func TestInferencePoolController_gatewayEventHandler(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -632,7 +616,7 @@ func TestInferencePoolController_aiGatewayRouteEventHandler(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -666,7 +650,7 @@ func TestInferencePoolController_httpRouteEventHandler(t *testing.T) {
{
BackendRef: gwapiv1.BackendRef{
BackendObjectReference: gwapiv1.BackendObjectReference{
- Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")),
+ Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")),
Kind: ptr.To(gwapiv1.Kind("InferencePool")),
Name: "test-inference-pool",
},
@@ -700,48 +684,19 @@ func TestInferencePoolController_EdgeCases(t *testing.T) {
require.NoError(t, err, "Should not error when InferencePool doesn't exist")
require.Equal(t, ctrl.Result{}, result)
- // Test InferencePool without ExtensionRef.
- inferencePoolNoExtRef := &gwaiev1a2.InferencePool{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-inference-pool-no-ext",
- Namespace: "default",
- },
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
- "app": "test-app",
- },
- TargetPortNumber: 8080,
- // No ExtensionRef.
- },
- }
- require.NoError(t, fakeClient.Create(context.Background(), inferencePoolNoExtRef))
-
- result, err = c.Reconcile(context.Background(), ctrl.Request{
- NamespacedName: client.ObjectKey{
- Name: "test-inference-pool-no-ext",
- Namespace: "default",
- },
- })
- require.NoError(t, err, "Should not error when InferencePool has no ExtensionRef")
- require.Equal(t, ctrl.Result{}, result)
-
// Test InferencePool with empty ExtensionRef name.
- inferencePoolEmptyExtRef := &gwaiev1a2.InferencePool{
+ inferencePoolEmptyExtRef := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-empty-ext",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "", // Empty name.
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "", // Empty name.
},
},
}
@@ -792,7 +747,7 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -819,22 +774,18 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) {
require.NoError(t, fakeClient.Create(context.Background(), service))
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
@@ -851,7 +802,7 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status was updated with the cross-namespace Gateway.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -861,10 +812,10 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) {
require.Len(t, updatedInferencePool.Status.Parents, 1)
parent := updatedInferencePool.Status.Parents[0]
- require.Equal(t, "gateway.networking.k8s.io", string(*parent.GatewayRef.Group))
- require.Equal(t, "Gateway", string(*parent.GatewayRef.Kind))
- require.Equal(t, "test-gateway", string(parent.GatewayRef.Name))
- require.Equal(t, "gateway-namespace", string(*parent.GatewayRef.Namespace))
+ require.Equal(t, "gateway.networking.k8s.io", string(*parent.ParentRef.Group))
+ require.Equal(t, "Gateway", string(parent.ParentRef.Kind))
+ require.Equal(t, "test-gateway", string(parent.ParentRef.Name))
+ require.Equal(t, "gateway-namespace", string(parent.ParentRef.Namespace))
}
func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) {
@@ -900,7 +851,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) {
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -911,17 +862,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) {
require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute))
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
Generation: 5, // Set a specific generation for testing.
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
require.NoError(t, fakeClient.Create(context.Background(), inferencePool))
@@ -930,7 +881,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) {
c.updateInferencePoolStatus(context.Background(), inferencePool, "NotAccepted", "test error message")
// Check that the status was updated.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool",
Namespace: "default",
@@ -940,7 +891,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) {
require.Len(t, updatedInferencePool.Status.Parents, 1)
parent := updatedInferencePool.Status.Parents[0]
- require.Equal(t, "test-gateway", string(parent.GatewayRef.Name))
+ require.Equal(t, "test-gateway", string(parent.ParentRef.Name))
require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions")
// Find the conditions.
@@ -970,16 +921,16 @@ func TestInferencePoolController_GetReferencedGateways_ErrorHandling(t *testing.
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
@@ -1000,7 +951,7 @@ func TestInferencePoolController_GetReferencedGateways_ErrorHandling(t *testing.
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1052,7 +1003,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool_HTTPRoute(t *tes
{
BackendRef: gwapiv1.BackendRef{
BackendObjectReference: gwapiv1.BackendObjectReference{
- Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")),
+ Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")),
Kind: ptr.To(gwapiv1.Kind("InferencePool")),
Name: "test-inference-pool",
},
@@ -1081,45 +1032,6 @@ func TestInferencePoolController_GatewayReferencesInferencePool_HTTPRoute(t *tes
func TestInferencePoolController_ValidateExtensionReference_EdgeCases(t *testing.T) {
fakeClient := requireNewFakeClientWithIndexesAndInferencePool(t)
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
-
- // Test with nil ExtensionRef.
- inferencePoolNilExt := &gwaiev1a2.InferencePool{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-inference-pool-nil-ext",
- Namespace: "default",
- },
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
- "app": "test-app",
- },
- TargetPortNumber: 8080,
- // No EndpointPickerConfig.
- },
- }
-
- err := c.validateExtensionReference(context.Background(), inferencePoolNilExt)
- require.NoError(t, err, "Should not error when ExtensionRef is nil")
-
- // Test with ExtensionRef but nil ExtensionRef field.
- inferencePoolNilExtRef := &gwaiev1a2.InferencePool{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-inference-pool-nil-extref",
- Namespace: "default",
- },
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
- "app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- // ExtensionRef is nil.
- },
- },
- }
-
- err = c.validateExtensionReference(context.Background(), inferencePoolNilExtRef)
- require.NoError(t, err, "Should not error when ExtensionRef field is nil")
-
// Test with service in different namespace (should fail).
serviceOtherNS := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
@@ -1136,27 +1048,23 @@ func TestInferencePoolController_ValidateExtensionReference_EdgeCases(t *testing
}
require.NoError(t, fakeClient.Create(context.Background(), serviceOtherNS))
- inferencePoolOtherNS := &gwaiev1a2.InferencePool{
+ inferencePoolOtherNS := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-other-ns",
Namespace: "default", // InferencePool in default namespace.
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "service-other-ns", // Service in other-namespace.
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "service-other-ns", // Refers to service in other-namespace.
},
},
}
- err = c.validateExtensionReference(context.Background(), inferencePoolOtherNS)
+ err := c.validateExtensionReference(context.Background(), inferencePoolOtherNS)
require.Error(t, err, "Should error when ExtensionReference service is in different namespace")
require.Contains(t, err.Error(), "ExtensionReference service service-other-ns not found in namespace default")
}
@@ -1166,22 +1074,18 @@ func TestInferencePoolController_Reconcile_ErrorHandling(t *testing.T) {
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Test reconcile with InferencePool that has empty ExtensionRef name.
- inferencePoolEmptyName := &gwaiev1a2.InferencePool{
+ inferencePoolEmptyName := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-empty-name",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "", // Empty name.
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "", // Empty name.
},
},
}
@@ -1199,22 +1103,18 @@ func TestInferencePoolController_Reconcile_ErrorHandling(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Test reconcile with InferencePool that has non-existent ExtensionRef service.
- inferencePoolNonExistentService := &gwaiev1a2.InferencePool{
+ inferencePoolNonExistentService := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-non-existent",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "non-existent-service",
- },
- },
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "non-existent-service",
},
},
}
@@ -1237,16 +1137,16 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) {
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Test syncInferencePool with InferencePool that has no referenced gateways.
- inferencePoolNoGateways := &gwaiev1a2.InferencePool{
+ inferencePoolNoGateways := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-no-gateways",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
require.NoError(t, fakeClient.Create(context.Background(), inferencePoolNoGateways))
@@ -1266,13 +1166,8 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) {
},
}
require.NoError(t, fakeClient.Create(context.Background(), service))
-
- inferencePoolNoGateways.Spec.EndpointPickerConfig = gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp-no-gateways",
- },
- },
+ inferencePoolNoGateways.Spec.EndpointPickerRef = gwaiev1.EndpointPickerRef{
+ Name: "test-epp-no-gateways",
}
require.NoError(t, fakeClient.Update(context.Background(), inferencePoolNoGateways))
@@ -1287,7 +1182,7 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) {
require.Equal(t, ctrl.Result{}, result)
// Check that the InferencePool status is empty (no parents).
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool-no-gateways",
Namespace: "default",
@@ -1301,16 +1196,16 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi
c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log)
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-complex",
Namespace: "default",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
@@ -1354,7 +1249,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool-complex",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1381,7 +1276,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool-complex",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1411,7 +1306,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi
{
BackendRef: gwapiv1.BackendRef{
BackendObjectReference: gwapiv1.BackendObjectReference{
- Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")),
+ Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")),
Kind: ptr.To(gwapiv1.Kind("InferencePool")),
Name: "test-inference-pool-complex",
},
@@ -1484,7 +1379,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool-multi",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1510,7 +1405,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool-multi",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1521,17 +1416,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t
require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute2))
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-multi",
Namespace: "default",
Generation: 10, // Set a specific generation for testing.
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
require.NoError(t, fakeClient.Create(context.Background(), inferencePool))
@@ -1540,7 +1435,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t
c.updateInferencePoolStatus(context.Background(), inferencePool, "Accepted", "all references resolved")
// Check that the status was updated for both gateways.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool-multi",
Namespace: "default",
@@ -1613,7 +1508,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool_NoRoutes(t *test
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1688,7 +1583,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t *
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{
Name: "test-inference-pool-ext-error",
- Group: ptr.To("inference.networking.x-k8s.io"),
+ Group: ptr.To("inference.networking.k8s.io"),
Kind: ptr.To("InferencePool"),
},
},
@@ -1699,17 +1594,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t *
require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute))
// Create an InferencePool.
- inferencePool := &gwaiev1a2.InferencePool{
+ inferencePool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-inference-pool-ext-error",
Namespace: "default",
Generation: 15, // Set a specific generation for testing.
},
- Spec: gwaiev1a2.InferencePoolSpec{
- Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{
+ Spec: gwaiev1.InferencePoolSpec{
+ Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{
"app": "test-app",
- },
- TargetPortNumber: 8080,
+ }},
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
},
}
require.NoError(t, fakeClient.Create(context.Background(), inferencePool))
@@ -1719,7 +1614,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t *
c.updateInferencePoolStatus(context.Background(), inferencePool, "NotAccepted", extRefErrorMessage)
// Check that the status was updated with ExtensionReference error.
- var updatedInferencePool gwaiev1a2.InferencePool
+ var updatedInferencePool gwaiev1.InferencePool
require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{
Name: "test-inference-pool-ext-error",
Namespace: "default",
@@ -1729,7 +1624,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t *
require.Len(t, updatedInferencePool.Status.Parents, 1)
parent := updatedInferencePool.Status.Parents[0]
- require.Equal(t, "test-gateway-ext-error", string(parent.GatewayRef.Name))
+ require.Equal(t, "test-gateway-ext-error", string(parent.ParentRef.Name))
require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions")
// Find the conditions.
diff --git a/internal/extensionserver/extensionserver_test.go b/internal/extensionserver/extensionserver_test.go
index 2a83efb36c..48fe405a3e 100644
--- a/internal/extensionserver/extensionserver_test.go
+++ b/internal/extensionserver/extensionserver_test.go
@@ -35,7 +35,7 @@ import (
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1"
"github.com/envoyproxy/ai-gateway/internal/controller"
@@ -186,7 +186,7 @@ func Test_maybeModifyCluster(t *testing.T) {
func createInferencePoolExtensionResource(name, namespace string) *egextension.ExtensionResource {
unstructuredObj := &unstructured.Unstructured{
Object: map[string]any{
- "apiVersion": "inference.networking.x-k8s.io/v1alpha2",
+ "apiVersion": "inference.networking.k8s.io/v1",
"kind": "InferencePool",
"metadata": map[string]any{
"name": name,
@@ -617,20 +617,16 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
s := New(newFakeClient(), logr.Discard(), udsPath, false)
// Helper function to create an InferencePool.
- createInferencePool := func(name, namespace string) *gwaiev1a2.InferencePool {
- return &gwaiev1a2.InferencePool{
+ createInferencePool := func(name, namespace string) *gwaiev1.InferencePool {
+ return &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
- Spec: gwaiev1a2.InferencePoolSpec{
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
@@ -659,7 +655,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
listener := &listenerv3.Listener{
Name: "test-listener",
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchListenerWithInferencePoolFilters(listener, pools)
// Should handle gracefully when no filter chains exist.
@@ -679,7 +675,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
},
},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
server.patchListenerWithInferencePoolFilters(listener, pools)
require.Contains(t, buf.String(), "failed to find an HCM in the current chain")
@@ -692,7 +688,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
}
listener := createListenerWithHCM("test-listener", existingFilters)
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchListenerWithInferencePoolFilters(listener, pools)
@@ -709,7 +705,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
}
listener := createListenerWithHCM("test-listener", existingFilters)
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchListenerWithInferencePoolFilters(listener, pools)
@@ -728,7 +724,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
}
listener := createListenerWithHCM("test-listener", existingFilters)
- pools := []*gwaiev1a2.InferencePool{
+ pools := []*gwaiev1.InferencePool{
createInferencePool("pool1", "test-ns"),
createInferencePool("pool2", "test-ns"),
}
@@ -774,7 +770,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchListenerWithInferencePoolFilters(listener, pools)
@@ -802,7 +798,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) {
listener := createListenerWithHCM("test-listener", []*httpconnectionmanagerv3.HttpFilter{
{Name: "envoy.filters.http.router"},
})
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
server.patchListenerWithInferencePoolFilters(listener, pools)
// This test mainly ensures the error handling path is covered.
@@ -815,27 +811,23 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
s := New(newFakeClient(), logr.Discard(), udsPath, false)
// Helper function to create an InferencePool.
- createInferencePool := func(name, namespace string) *gwaiev1a2.InferencePool {
- return &gwaiev1a2.InferencePool{
+ createInferencePool := func(name, namespace string) *gwaiev1.InferencePool {
+ return &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
- Spec: gwaiev1a2.InferencePoolSpec{
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
}
// Helper function to create a route with InferencePool metadata.
- createRouteWithInferencePool := func(routeName string, pool *gwaiev1a2.InferencePool) *routev3.Route {
+ createRouteWithInferencePool := func(routeName string, pool *gwaiev1.InferencePool) *routev3.Route {
metadata := &corev3.Metadata{
FilterMetadata: map[string]*structpb.Struct{
internalapi.InternalEndpointMetadataNamespace: {
@@ -859,7 +851,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchVirtualHostWithInferencePool(vh, pools)
// Should handle gracefully when no routes exist.
@@ -873,7 +865,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{normalRoute},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -891,7 +883,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{inferenceRoute},
}
- pools := []*gwaiev1a2.InferencePool{pool}
+ pools := []*gwaiev1.InferencePool{pool}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -914,7 +906,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{inferenceRoute},
}
- pools := []*gwaiev1a2.InferencePool{pool1, pool2}
+ pools := []*gwaiev1.InferencePool{pool1, pool2}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -946,7 +938,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{directResponseRoute},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -972,7 +964,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{directResponseRoute},
}
- pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")}
+ pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -994,7 +986,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) {
Name: "test-vh",
Routes: []*routev3.Route{normalRoute, inferenceRoute1, inferenceRoute2},
}
- pools := []*gwaiev1a2.InferencePool{pool1, pool2}
+ pools := []*gwaiev1.InferencePool{pool1, pool2}
s.patchVirtualHostWithInferencePool(vh, pools)
@@ -1209,19 +1201,15 @@ func TestConstructInferencePoolsFrom(t *testing.T) {
// TestInferencePoolHelperFunctions tests various helper functions for InferencePool.
func TestInferencePoolHelperFunctions(t *testing.T) {
// Create a test InferencePool.
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: "test-epp",
},
},
}
@@ -1253,8 +1241,8 @@ func TestInferencePoolHelperFunctions(t *testing.T) {
t.Run("portForInferencePool custom", func(t *testing.T) {
customPool := pool.DeepCopy()
- customPort := gwaiev1a2.PortNumber(8888)
- customPool.Spec.ExtensionRef.PortNumber = &customPort
+ customPort := gwaiev1.PortNumber(8888)
+ customPool.Spec.EndpointPickerRef.Port = &gwaiev1.Port{Number: customPort}
port := portForInferencePool(customPool)
require.Equal(t, uint32(8888), port)
})
@@ -1264,7 +1252,7 @@ func TestInferencePoolHelperFunctions(t *testing.T) {
func TestInferencePoolAnnotationHelpers(t *testing.T) {
t.Run("getProcessingBodyModeFromAnnotations", func(t *testing.T) {
t.Run("no annotations", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1275,7 +1263,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to duplex", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1289,7 +1277,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to buffered", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1303,7 +1291,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to invalid value", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1319,7 +1307,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
t.Run("getAllowModeOverrideFromAnnotations", func(t *testing.T) {
t.Run("no annotations", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1330,7 +1318,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to true", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1344,7 +1332,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to false", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1358,7 +1346,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to invalid value", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1374,7 +1362,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
t.Run("getProcessingBodyModeStringFromAnnotations", func(t *testing.T) {
t.Run("no annotations", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1385,7 +1373,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to duplex", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1399,7 +1387,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to buffered", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1413,7 +1401,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to invalid value", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1429,7 +1417,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
t.Run("getAllowModeOverrideStringFromAnnotations", func(t *testing.T) {
t.Run("no annotations", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1440,7 +1428,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to true", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1454,7 +1442,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to false", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1468,7 +1456,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
})
t.Run("annotation set to invalid value", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1486,19 +1474,13 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) {
// TestBuildHTTPFilterForInferencePool tests the buildHTTPFilterForInferencePool function with annotations.
func TestBuildHTTPFilterForInferencePool(t *testing.T) {
t.Run("default configuration", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"},
},
}
@@ -1512,7 +1494,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
})
t.Run("with buffered mode annotation", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1520,14 +1502,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
"aigateway.envoyproxy.io/processing-body-mode": "buffered",
},
},
- Spec: gwaiev1a2.InferencePoolSpec{
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"},
},
}
@@ -1541,7 +1517,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
})
t.Run("with allow mode override annotation", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1549,14 +1525,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
"aigateway.envoyproxy.io/allow-mode-override": "true",
},
},
- Spec: gwaiev1a2.InferencePoolSpec{
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"},
},
}
@@ -1570,7 +1540,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
})
t.Run("with both annotations", func(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
@@ -1579,14 +1549,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
"aigateway.envoyproxy.io/allow-mode-override": "true",
},
},
- Spec: gwaiev1a2.InferencePoolSpec{
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"},
},
}
@@ -1602,20 +1566,14 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) {
// TestBuildExtProcClusterForInferencePoolEndpointPicker tests cluster building.
func TestBuildExtProcClusterForInferencePoolEndpointPicker(t *testing.T) {
- pool := &gwaiev1a2.InferencePool{
+ pool := &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Namespace: "test-ns",
},
- Spec: gwaiev1a2.InferencePoolSpec{
- TargetPortNumber: 8080,
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: "test-epp",
- },
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ TargetPorts: []gwaiev1.Port{{Number: 8080}},
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"},
},
}
@@ -1634,14 +1592,6 @@ func TestBuildExtProcClusterForInferencePoolEndpointPicker(t *testing.T) {
buildExtProcClusterForInferencePoolEndpointPicker(nil)
})
})
-
- t.Run("nil ExtensionRef panics", func(t *testing.T) {
- invalidPool := pool.DeepCopy()
- invalidPool.Spec.ExtensionRef = nil
- require.Panics(t, func() {
- buildExtProcClusterForInferencePoolEndpointPicker(invalidPool)
- })
- })
}
// TestBuildClustersForInferencePoolEndpointPickers tests building clusters from existing clusters.
diff --git a/internal/extensionserver/inferencepool.go b/internal/extensionserver/inferencepool.go
index 6911bb34cb..aaef644a46 100644
--- a/internal/extensionserver/inferencepool.go
+++ b/internal/extensionserver/inferencepool.go
@@ -31,7 +31,7 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/ptr"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"github.com/envoyproxy/ai-gateway/internal/internalapi"
)
@@ -53,11 +53,11 @@ const (
allowModeOverrideAnnotation = "aigateway.envoyproxy.io/allow-mode-override"
)
-func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.ExtensionResource) []*gwaiev1a2.InferencePool {
+func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.ExtensionResource) []*gwaiev1.InferencePool {
// Parse InferencePool resources from BackendExtensionResources.
// BackendExtensionResources contains unstructured Kubernetes resources that were
// referenced in the AIGatewayRoute's BackendRefs with non-empty Group and Kind fields.
- var inferencePools []*gwaiev1a2.InferencePool
+ var inferencePools []*gwaiev1.InferencePool
for _, resource := range extensionResources {
// Unmarshal the unstructured bytes to get the Kubernetes resource.
// The resource is stored as JSON bytes in the extension context.
@@ -69,11 +69,11 @@ func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.E
// Check if this is an InferencePool resource from the Gateway API Inference Extension.
// We only process InferencePool resources; other extension resources are ignored.
- if unstructuredObj.GetAPIVersion() == "inference.networking.x-k8s.io/v1alpha2" &&
+ if unstructuredObj.GetAPIVersion() == "inference.networking.k8s.io/v1" &&
unstructuredObj.GetKind() == "InferencePool" {
// Convert unstructured object to strongly-typed InferencePool.
// This allows us to access the InferencePool's spec fields safely.
- var pool gwaiev1a2.InferencePool
+ var pool gwaiev1.InferencePool
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.Object, &pool); err != nil {
s.log.Error(err, "failed to convert unstructured to InferencePool",
"name", unstructuredObj.GetName(), "namespace", unstructuredObj.GetNamespace())
@@ -87,7 +87,7 @@ func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.E
}
// getInferencePoolByMetadata returns the InferencePool from the cluster metadata.
-func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool {
+func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1.InferencePool {
var metadata string
if meta != nil && meta.FilterMetadata != nil {
m, ok := meta.FilterMetadata[internalapi.InternalEndpointMetadataNamespace]
@@ -112,7 +112,7 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool
}
processingBodyMode := result[4]
allowModeOverride := result[5]
- return &gwaiev1a2.InferencePool{
+ return &gwaiev1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
@@ -121,14 +121,10 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool
allowModeOverrideAnnotation: allowModeOverride,
},
},
- Spec: gwaiev1a2.InferencePoolSpec{
- EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{
- ExtensionRef: &gwaiev1a2.Extension{
- ExtensionReference: gwaiev1a2.ExtensionReference{
- Name: gwaiev1a2.ObjectName(serviceName),
- PortNumber: ptr.To(gwaiev1a2.PortNumber(port)),
- },
- },
+ Spec: gwaiev1.InferencePoolSpec{
+ EndpointPickerRef: gwaiev1.EndpointPickerRef{
+ Name: gwaiev1.ObjectName(serviceName),
+ Port: ptr.To(gwaiev1.Port{Number: gwaiev1.PortNumber(port)}),
},
},
}
@@ -136,19 +132,19 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool
// buildMetadataForInferencePool adds InferencePool metadata to the cluster for reference by other components.
// encoded as a string in the format: "namespace/name/serviceName/port".
-func buildEPPMetadataForCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1a2.InferencePool) {
+func buildEPPMetadataForCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1.InferencePool) {
// Initialize cluster metadata structure if not present.
buildEPPMetadata(cluster.Metadata, inferencePool)
}
// buildMetadataForInferencePool adds InferencePool metadata to the route for reference by other components.
-func buildEPPMetadataForRoute(route *routev3.Route, inferencePool *gwaiev1a2.InferencePool) {
+func buildEPPMetadataForRoute(route *routev3.Route, inferencePool *gwaiev1.InferencePool) {
// Initialize route metadata structure if not present.
buildEPPMetadata(route.Metadata, inferencePool)
}
// buildEPPMetadata adds InferencePool metadata to the given metadata structure.
-func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1a2.InferencePool) {
+func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1.InferencePool) {
// Initialize cluster metadata structure if not present.
if metadata == nil {
metadata = &corev3.Metadata{}
@@ -178,7 +174,7 @@ func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1a2.Infere
clusterRefInferencePool(
inferencePool.Namespace,
inferencePool.Name,
- string(inferencePool.Spec.ExtensionRef.Name),
+ string(inferencePool.Spec.EndpointPickerRef.Name),
portForInferencePool(inferencePool),
processingBodyMode,
allowModeOverride,
@@ -201,13 +197,10 @@ func buildClustersForInferencePoolEndpointPickers(clusters []*clusterv3.Cluster)
// buildExtProcClusterForInferencePoolEndpointPicker builds and returns a "STRICT_DNS" cluster
// for connecting to the InferencePool's endpoint picker service.
-func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1a2.InferencePool) *clusterv3.Cluster {
+func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1.InferencePool) *clusterv3.Cluster {
if pool == nil {
panic("InferencePool cannot be nil")
}
- if pool.Spec.ExtensionRef == nil {
- panic("InferencePool ExtensionRef cannot be nil")
- }
name := clusterNameForInferencePool(pool)
c := &clusterv3.Cluster{
@@ -276,7 +269,7 @@ func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1a2.Inference
}
// buildInferencePoolHTTPFilter returns a HTTP filter for InferencePool.
-func buildInferencePoolHTTPFilter(pool *gwaiev1a2.InferencePool) *httpconnectionmanagerv3.HttpFilter {
+func buildInferencePoolHTTPFilter(pool *gwaiev1.InferencePool) *httpconnectionmanagerv3.HttpFilter {
poolFilter := buildHTTPFilterForInferencePool(pool)
return &httpconnectionmanagerv3.HttpFilter{
Name: httpFilterNameForInferencePool(pool),
@@ -285,7 +278,7 @@ func buildInferencePoolHTTPFilter(pool *gwaiev1a2.InferencePool) *httpconnection
}
// buildHTTPFilterForInferencePool returns the HTTP filter for the given InferencePool.
-func buildHTTPFilterForInferencePool(pool *gwaiev1a2.InferencePool) *extprocv3.ExternalProcessor {
+func buildHTTPFilterForInferencePool(pool *gwaiev1.InferencePool) *extprocv3.ExternalProcessor {
// Read processing body mode from annotations, default to "duplex" (FULL_DUPLEX_STREAMED)
processingBodyMode := getProcessingBodyModeFromAnnotations(pool)
@@ -317,7 +310,7 @@ func buildHTTPFilterForInferencePool(pool *gwaiev1a2.InferencePool) *extprocv3.E
// getProcessingBodyModeFromAnnotations reads the processing body mode from InferencePool annotations.
// Returns FULL_DUPLEX_STREAMED for "duplex" (default) or BUFFERED for "buffered".
-func getProcessingBodyModeFromAnnotations(pool *gwaiev1a2.InferencePool) extprocv3.ProcessingMode_BodySendMode {
+func getProcessingBodyModeFromAnnotations(pool *gwaiev1.InferencePool) extprocv3.ProcessingMode_BodySendMode {
annotations := pool.GetAnnotations()
if annotations == nil {
return extprocv3.ProcessingMode_FULL_DUPLEX_STREAMED // default to duplex
@@ -341,7 +334,7 @@ func getProcessingBodyModeFromAnnotations(pool *gwaiev1a2.InferencePool) extproc
// getAllowModeOverrideFromAnnotations reads the allow mode override setting from InferencePool annotations.
// Returns false by default, true if annotation is set to "true".
-func getAllowModeOverrideFromAnnotations(pool *gwaiev1a2.InferencePool) bool {
+func getAllowModeOverrideFromAnnotations(pool *gwaiev1.InferencePool) bool {
annotations := pool.GetAnnotations()
if annotations == nil {
return false // default to false
@@ -356,7 +349,7 @@ func getAllowModeOverrideFromAnnotations(pool *gwaiev1a2.InferencePool) bool {
}
// getProcessingBodyModeStringFromAnnotations reads the processing body mode from InferencePool annotations.
-func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1a2.InferencePool) string {
+func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1.InferencePool) string {
annotations := pool.GetAnnotations()
if annotations == nil {
return "duplex" // default to duplex
@@ -371,7 +364,7 @@ func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1a2.InferencePool) s
}
// getAllowModeOverrideStringFromAnnotations reads the allow mode override setting from InferencePool annotations.
-func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1a2.InferencePool) string {
+func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1.InferencePool) string {
annotations := pool.GetAnnotations()
if annotations == nil {
return "false" // default to false
@@ -386,39 +379,39 @@ func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1a2.InferencePool) st
}
// authorityForInferencePool formats the gRPC authority based on the given InferencePool.
-func authorityForInferencePool(pool *gwaiev1a2.InferencePool) string {
+func authorityForInferencePool(pool *gwaiev1.InferencePool) string {
ns := pool.GetNamespace()
- svc := pool.Spec.ExtensionRef.Name
+ svc := pool.Spec.EndpointPickerRef.Name
return fmt.Sprintf("%s.%s.svc:%d", svc, ns, portForInferencePool(pool))
}
// dnsNameForInferencePool formats the DNS name based on the given InferencePool.
-func dnsNameForInferencePool(pool *gwaiev1a2.InferencePool) string {
+func dnsNameForInferencePool(pool *gwaiev1.InferencePool) string {
ns := pool.GetNamespace()
- svc := pool.Spec.ExtensionRef.Name
+ svc := pool.Spec.EndpointPickerRef.Name
return fmt.Sprintf("%s.%s.svc", svc, ns)
}
// portForInferencePool returns the port number for the given InferencePool.
-func portForInferencePool(pool *gwaiev1a2.InferencePool) uint32 {
- if p := pool.Spec.ExtensionRef.PortNumber; p == nil {
+func portForInferencePool(pool *gwaiev1.InferencePool) uint32 {
+ if p := pool.Spec.EndpointPickerRef.Port; p == nil {
return defaultEndpointPickerPort
}
- portNumber := *pool.Spec.ExtensionRef.PortNumber
+ portNumber := pool.Spec.EndpointPickerRef.Port.Number
if portNumber < 0 || portNumber > 65535 {
return defaultEndpointPickerPort // fallback to default port.
}
// Safe conversion: portNumber is validated to be in range [0, 65535].
- return uint32(portNumber) // #nosec G115
+ return uint32(portNumber) // #nosec G1151
}
// clusterNameForInferencePool returns the name of the ext_proc cluster for the given InferencePool.
-func clusterNameForInferencePool(pool *gwaiev1a2.InferencePool) string {
+func clusterNameForInferencePool(pool *gwaiev1.InferencePool) string {
return fmt.Sprintf("envoy.clusters.endpointpicker_%s_%s_ext_proc", pool.GetName(), pool.GetNamespace())
}
// httpFilterNameForInferencePool returns the name of the ext_proc cluster for the given InferencePool.
-func httpFilterNameForInferencePool(pool *gwaiev1a2.InferencePool) string {
+func httpFilterNameForInferencePool(pool *gwaiev1.InferencePool) string {
return fmt.Sprintf("envoy.filters.http.ext_proc/endpointpicker/%s_%s_ext_proc", pool.GetName(), pool.GetNamespace())
}
@@ -440,7 +433,7 @@ func findHCM(filterChain *listenerv3.FilterChain) (*httpconnectionmanagerv3.Http
}
// Tries to find the inference pool ext proc filter in the provided chain.
-func searchInferencePoolInFilterChain(pool *gwaiev1a2.InferencePool, chain []*httpconnectionmanagerv3.HttpFilter) (*extprocv3.ExternalProcessor, int, error) {
+func searchInferencePoolInFilterChain(pool *gwaiev1.InferencePool, chain []*httpconnectionmanagerv3.HttpFilter) (*extprocv3.ExternalProcessor, int, error) {
for i, filter := range chain {
if filter.Name == httpFilterNameForInferencePool(pool) {
ep := new(extprocv3.ExternalProcessor)
diff --git a/internal/extensionserver/post_cluster_modify.go b/internal/extensionserver/post_cluster_modify.go
index f33940edd2..6accb327d1 100644
--- a/internal/extensionserver/post_cluster_modify.go
+++ b/internal/extensionserver/post_cluster_modify.go
@@ -13,7 +13,7 @@ import (
egextension "github.com/envoyproxy/gateway/proto/extension"
clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
"google.golang.org/protobuf/types/known/durationpb"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"github.com/envoyproxy/ai-gateway/internal/internalapi"
)
@@ -64,7 +64,7 @@ func (s *Server) PostClusterModify(_ context.Context, req *egextension.PostClust
//
// The ORIGINAL_DST cluster type tells Envoy to route requests to the destination specified
// in the x-gateway-destination-endpoint header, enabling dynamic endpoint selection by the EPP.
-func (s *Server) handleInferencePoolCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1a2.InferencePool) {
+func (s *Server) handleInferencePoolCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1.InferencePool) {
// Configure cluster for ORIGINAL_DST with header-based load balancing.
// ORIGINAL_DST type allows Envoy to route to destinations specified in HTTP headers.
cluster.ClusterDiscoveryType = &clusterv3.Cluster_Type{Type: clusterv3.Cluster_ORIGINAL_DST}
diff --git a/internal/extensionserver/post_translate_modify.go b/internal/extensionserver/post_translate_modify.go
index aed9511fe8..1595a66bf5 100644
--- a/internal/extensionserver/post_translate_modify.go
+++ b/internal/extensionserver/post_translate_modify.go
@@ -30,7 +30,7 @@ import (
"google.golang.org/protobuf/types/known/wrapperspb"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1"
"github.com/envoyproxy/ai-gateway/internal/internalapi"
@@ -364,14 +364,14 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener,
// inferencePoolRoutes builds a matrix of route configs and the inference pools they use.
routeNameToRoute := make(map[string]*routev3.RouteConfiguration)
- routeNameToVHRouteNameToInferencePool := make(map[string]map[string]*gwaiev1a2.InferencePool)
+ routeNameToVHRouteNameToInferencePool := make(map[string]map[string]*gwaiev1.InferencePool)
for _, routeCfg := range routes {
routeNameToRoute[routeCfg.Name] = routeCfg
for _, vh := range routeCfg.VirtualHosts {
for _, route := range vh.Routes {
if pool := getInferencePoolByMetadata(route.Metadata); pool != nil {
if routeNameToVHRouteNameToInferencePool[routeCfg.Name] == nil {
- routeNameToVHRouteNameToInferencePool[routeCfg.Name] = make(map[string]*gwaiev1a2.InferencePool)
+ routeNameToVHRouteNameToInferencePool[routeCfg.Name] = make(map[string]*gwaiev1.InferencePool)
}
routeNameToVHRouteNameToInferencePool[routeCfg.Name][route.Name] = pool
}
@@ -380,7 +380,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener,
}
// listenerToInferencePools builds a matrix of listeners and the inference pools they use.
- listenerToInferencePools := make(map[string][]*gwaiev1a2.InferencePool)
+ listenerToInferencePools := make(map[string][]*gwaiev1.InferencePool)
for listener, routeCfgNames := range listenerNameToRouteNames {
for _, name := range routeCfgNames {
if routeNameToRoute[name] == nil {
@@ -391,7 +391,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener,
}
for _, pool := range routeNameToVHRouteNameToInferencePool[name] {
if listenerToInferencePools[listener] == nil {
- listenerToInferencePools[listener] = make([]*gwaiev1a2.InferencePool, 0)
+ listenerToInferencePools[listener] = make([]*gwaiev1.InferencePool, 0)
}
listenerToInferencePools[listener] = append(listenerToInferencePools[listener], pool)
}
@@ -433,7 +433,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener,
}
// patchListenerWithInferencePoolFilters adds the necessary HTTP filters to the listener to support InferencePool backends.
-func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.Listener, inferencePools []*gwaiev1a2.InferencePool) {
+func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.Listener, inferencePools []*gwaiev1.InferencePool) {
// First, get the filter chains from the listener.
filterChains := listener.GetFilterChains()
defaultFC := listener.DefaultFilterChain
@@ -476,8 +476,8 @@ func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.List
}
// patchVirtualHostWithInferencePool adds the necessary per-route configuration to disable.
-func (s *Server) patchVirtualHostWithInferencePool(vh *routev3.VirtualHost, inferencePools []*gwaiev1a2.InferencePool) {
- inferenceMatrix := make(map[string]*gwaiev1a2.InferencePool)
+func (s *Server) patchVirtualHostWithInferencePool(vh *routev3.VirtualHost, inferencePools []*gwaiev1.InferencePool) {
+ inferenceMatrix := make(map[string]*gwaiev1.InferencePool)
for _, pool := range inferencePools {
inferenceMatrix[httpFilterNameForInferencePool(pool)] = pool
}
diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
index 7802b8c694..d1085c795b 100644
--- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
+++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
@@ -451,7 +451,7 @@ spec:
description: |-
Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
- Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+ Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
maxLength: 253
pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
@@ -505,9 +505,9 @@ spec:
- message: group and kind must be specified together
rule: '!has(self.group) && !has(self.kind) || (has(self.group)
&& has(self.kind))'
- - message: only InferencePool from inference.networking.x-k8s.io
+ - message: only InferencePool from inference.networking.k8s.io
group is supported
- rule: '!has(self.group) || (self.group == ''inference.networking.x-k8s.io''
+ rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io''
&& self.kind == ''InferencePool'')'
maxItems: 128
type: array
diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml
index 7802b8c694..d1085c795b 100644
--- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml
+++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml
@@ -451,7 +451,7 @@ spec:
description: |-
Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
- Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+ Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
maxLength: 253
pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
@@ -505,9 +505,9 @@ spec:
- message: group and kind must be specified together
rule: '!has(self.group) && !has(self.kind) || (has(self.group)
&& has(self.kind))'
- - message: only InferencePool from inference.networking.x-k8s.io
+ - message: only InferencePool from inference.networking.k8s.io
group is supported
- rule: '!has(self.group) || (self.group == ''inference.networking.x-k8s.io''
+ rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io''
&& self.kind == ''InferencePool'')'
maxItems: 128
type: array
diff --git a/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml b/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml
index 4d62d63483..ac37d2ddc5 100644
--- a/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml
+++ b/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml
@@ -35,7 +35,7 @@ rules:
verbs:
- '*'
- apiGroups:
- - inference.networking.x-k8s.io
+ - inference.networking.k8s.io
resources:
- '*'
verbs:
diff --git a/manifests/envoy-gateway-config/rbac.yaml b/manifests/envoy-gateway-config/rbac.yaml
index 79eb2b7c44..beb515e667 100644
--- a/manifests/envoy-gateway-config/rbac.yaml
+++ b/manifests/envoy-gateway-config/rbac.yaml
@@ -20,7 +20,7 @@ rules:
- "list"
- "watch"
- apiGroups:
- - "inference.networking.x-k8s.io"
+ - "inference.networking.k8s.io"
resources:
- "inferencepools"
verbs:
diff --git a/site/blog/2025/2025-07-30-epp-introduction.md b/site/blog/2025/2025-07-30-epp-introduction.md
index 38c7a60670..17fe960f23 100644
--- a/site/blog/2025/2025-07-30-epp-introduction.md
+++ b/site/blog/2025/2025-07-30-epp-introduction.md
@@ -90,7 +90,7 @@ spec:
namespace: default
rules:
- backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
@@ -133,7 +133,7 @@ spec:
name: x-ai-eg-model
value: meta-llama/Llama-3.1-8B-Instruct
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
- matches:
@@ -142,7 +142,7 @@ spec:
name: x-ai-eg-model
value: mistral:latest
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: mistral
- matches:
@@ -201,11 +201,11 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/vllm/sim-deployment.yaml
```
-### 3\. Configure InferenceModel and InferencePool
+### 3\. Configure InferenceObjective and InferencePool
```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
-kind: InferenceModel
+apiVersion: inference.networking.k8s.io/v1
+kind: InferenceObjective
metadata:
name: base-model
spec:
@@ -214,7 +214,7 @@ spec:
poolRef:
name: vllm-llama3-8b-instruct
---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-llama3-8b-instruct
diff --git a/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio b/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio
index 002043a215..fe265eba9a 100644
--- a/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio
+++ b/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio
@@ -424,10 +424,10 @@
-
+
-
+
@@ -462,10 +462,10 @@
-
+
-
+
diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx
index 3387704a34..56f2a5abf3 100644
--- a/site/docs/api/api.mdx
+++ b/site/docs/api/api.mdx
@@ -552,7 +552,7 @@ It can reference either an AIServiceBackend or an InferencePool resource.
name="group"
type="string"
required="false"
- description="Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
Currently, only `inference.networking.x-k8s.io` is supported for InferencePool resources."
+ description="Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
Currently, only `inference.networking.k8s.io` is supported for InferencePool resources."
/> **Note**: These deployments create the `vllm-llama3-8b-instruct` InferencePool and related resources that are referenced in the AIGatewayRoute configuration below.
-## Step 3: Create EndpointPicker Resources
+## Step 3: Create Custom InferencePool Resources
-Create the base resources for the example, including additional inference backends:
+Create additional inference backends with custom EndpointPicker configuration:
```yaml
cat < **Note**: This deployment creates the `vllm-llama3-8b-instruct` InferencePool and related resources that are referenced in the HTTPRoute configuration below.
-## Step 3: Create InferenceModel
+## Step 3: Create InferenceObjective
-Create an InferenceModel resource to define the model configuration:
+Create an InferenceObjective resource to define the model configuration:
```bash
-kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencemodel.yaml
+kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.0.1/config/manifests/inferenceobjective.yaml
```
## Step 4: Create InferencePool Resources
@@ -60,14 +60,15 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens
Deploy the InferencePool and related resources:
```bash
-kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencepool-resources.yaml
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.1/config/manifests/inferencepool-resources.yaml
```
This creates:
- InferencePool resource defining the endpoint selection criteria
-- Endpoint Picker Provider (EPP) deployment for intelligent routing
+- Endpoint Picker Provider (EPP) deployment for intelligent routing with advanced scheduling plugins
- Associated services and configurations
+- RBAC permissions for accessing InferencePool and Pod resources
## Step 5: Configure Gateway and HTTPRoute
@@ -107,10 +108,11 @@ spec:
namespace: default
rules:
- backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: vllm-llama3-8b-instruct
namespace: default
+ port: 8080
weight: 1
matches:
- path:
diff --git a/tests/crdcel/main_test.go b/tests/crdcel/main_test.go
index 408352958e..8130622875 100644
--- a/tests/crdcel/main_test.go
+++ b/tests/crdcel/main_test.go
@@ -51,7 +51,7 @@ func TestAIGatewayRoutes(t *testing.T) {
},
{
name: "inference_pool_unsupported_group.yaml",
- expErr: "spec.rules[0].backendRefs[0]: Invalid value: \"object\": only InferencePool from inference.networking.x-k8s.io group is supported",
+ expErr: "spec.rules[0].backendRefs[0]: Invalid value: \"object\": only InferencePool from inference.networking.k8s.io group is supported",
},
} {
t.Run(tc.name, func(t *testing.T) {
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml
index 285042b651..fe15989506 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml
@@ -13,11 +13,8 @@ spec:
parentRefs:
- name: gateway
rules:
- - matches:
- - path:
- type: PathPrefix
- value: /v1/chat/completions
+ - matches: []
backendRefs:
- - group: inference.networking.x-k8s.io
+ - group: inference.networking.k8s.io
kind: InferencePool
name: my-inference-pool
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml
index cc130935e6..2038b980d5 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml
@@ -24,5 +24,5 @@ spec:
backendRefs:
- name: ai-service-backend
- name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml
index 64a6c0949c..3931176aee 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml
@@ -23,8 +23,8 @@ spec:
value: llama3-8b
backendRefs:
- name: vllm-llama3-8b-instruct-1
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
- name: vllm-llama3-8b-instruct-2
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml
index dfd9213f37..145d186278 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml
@@ -23,5 +23,5 @@ spec:
value: llama3-8b
backendRefs:
- name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
# Missing kind field
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml
index 12bee37490..4b927c621d 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml
@@ -3,7 +3,7 @@
# The full text of the Apache license is available in the LICENSE file at
# the root of the repo.
-# This should fail validation: only InferencePool from inference.networking.x-k8s.io group is supported
+# This should fail validation: only InferencePool from inference.networking.k8s.io group is supported
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIGatewayRoute
diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml
index 27f3293376..d512c49371 100644
--- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml
+++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml
@@ -21,5 +21,5 @@ spec:
value: llama3-8b
backendRefs:
- name: vllm-llama3-8b-instruct
- group: inference.networking.x-k8s.io
+ group: inference.networking.k8s.io
kind: InferencePool
diff --git a/tests/e2e-inference-extension/conformance_test.go b/tests/e2e-inference-extension/conformance_test.go
index f54cad3e6e..a67e879827 100644
--- a/tests/e2e-inference-extension/conformance_test.go
+++ b/tests/e2e-inference-extension/conformance_test.go
@@ -6,6 +6,8 @@
package e2e
import (
+ "fmt"
+ "os"
"testing"
"time"
@@ -40,10 +42,19 @@ func TestGatewayAPIInferenceExtension(t *testing.T) {
config.SetupTimeoutConfig(&defaultTimeoutConfig)
options.TimeoutConfig = defaultTimeoutConfig
options.GatewayClassName = "inference-pool"
- // enable EPPUnAvaliableFailOpen after https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/1265 merged.
- options.SkipTests = []string{
- "EppUnAvailableFailOpen",
- }
+ options.SkipTests = []string{}
+
+ // Setup cleanup to print report even if test fails
+ t.Cleanup(func() {
+ if content, err := os.ReadFile(options.ReportOutputPath); err != nil {
+ t.Logf("Failed to read conformance report file %s: %v", options.ReportOutputPath, err)
+ } else {
+ fmt.Printf("\n=== CONFORMANCE TEST REPORT (CLEANUP) ===\n")
+ fmt.Printf("Report file: %s\n", options.ReportOutputPath)
+ fmt.Printf("Content:\n%s\n", string(content))
+ fmt.Printf("=== END OF REPORT (CLEANUP) ===\n\n")
+ }
+ })
gie.RunConformanceWithOptions(t, options)
}
diff --git a/tests/e2e-inference-extension/inference_pool_test.go b/tests/e2e-inference-extension/inference_pool_test.go
index 5f129d45ca..306d0bf2cd 100644
--- a/tests/e2e-inference-extension/inference_pool_test.go
+++ b/tests/e2e-inference-extension/inference_pool_test.go
@@ -18,7 +18,7 @@ import (
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+ gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"github.com/envoyproxy/ai-gateway/tests/internal/e2elib"
)
@@ -176,14 +176,14 @@ func testInferenceGatewayConnectivity(t *testing.T, egSelector, body string, add
}
// getInferencePoolStatus retrieves the status of an InferencePool resource.
-func getInferencePoolStatus(ctx context.Context, namespace, name string) (*gwaiev1a2.InferencePoolStatus, error) {
+func getInferencePoolStatus(ctx context.Context, namespace, name string) (*gwaiev1.InferencePoolStatus, error) {
cmd := exec.CommandContext(ctx, "kubectl", "get", "inferencepool", name, "-n", namespace, "-o", "json")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("failed to get InferencePool %s/%s: %w", namespace, name, err)
}
- var inferencePool gwaiev1a2.InferencePool
+ var inferencePool gwaiev1.InferencePool
if err := json.Unmarshal(out, &inferencePool); err != nil {
return nil, fmt.Errorf("failed to unmarshal InferencePool: %w", err)
}
@@ -207,10 +207,10 @@ func requireInferencePoolStatusValid(t *testing.T, namespace, inferencePoolName,
}
// Find the parent status for the expected Gateway.
- var foundParent *gwaiev1a2.PoolStatus
+ var foundParent *gwaiev1.ParentStatus
for i := range status.Parents {
parent := &status.Parents[i]
- if string(parent.GatewayRef.Name) == expectedGatewayName {
+ if string(parent.ParentRef.Name) == expectedGatewayName {
foundParent = parent
break
}
@@ -222,23 +222,23 @@ func requireInferencePoolStatusValid(t *testing.T, namespace, inferencePoolName,
}
// Validate the GatewayRef fields.
- if foundParent.GatewayRef.Group == nil || string(*foundParent.GatewayRef.Group) != "gateway.networking.k8s.io" {
- t.Logf("InferencePool %s parent GatewayRef has incorrect group: %v", inferencePoolName, foundParent.GatewayRef.Group)
+ if foundParent.ParentRef.Group == nil || string(*foundParent.ParentRef.Group) != "gateway.networking.k8s.io" {
+ t.Logf("InferencePool %s parent GatewayRef has incorrect group: %v", inferencePoolName, foundParent.ParentRef.Group)
return false
}
- if foundParent.GatewayRef.Kind == nil || string(*foundParent.GatewayRef.Kind) != "Gateway" {
- t.Logf("InferencePool %s parent GatewayRef has incorrect kind: %v", inferencePoolName, foundParent.GatewayRef.Kind)
+ if string(foundParent.ParentRef.Kind) != "Gateway" {
+ t.Logf("InferencePool %s parent GatewayRef has incorrect kind: %v", inferencePoolName, foundParent.ParentRef.Kind)
return false
}
- if string(foundParent.GatewayRef.Name) != expectedGatewayName {
- t.Logf("InferencePool %s parent GatewayRef has incorrect name: %s (expected %s)", inferencePoolName, foundParent.GatewayRef.Name, expectedGatewayName)
+ if string(foundParent.ParentRef.Name) != expectedGatewayName {
+ t.Logf("InferencePool %s parent GatewayRef has incorrect name: %s (expected %s)", inferencePoolName, foundParent.ParentRef.Name, expectedGatewayName)
return false
}
- if foundParent.GatewayRef.Namespace == nil || string(*foundParent.GatewayRef.Namespace) != namespace {
- t.Logf("InferencePool %s parent GatewayRef has incorrect namespace: %v (expected %s)", inferencePoolName, foundParent.GatewayRef.Namespace, namespace)
+ if string(foundParent.ParentRef.Namespace) != namespace {
+ t.Logf("InferencePool %s parent GatewayRef has incorrect namespace: %v (expected %s)", inferencePoolName, foundParent.ParentRef.Namespace, namespace)
return false
}
diff --git a/tests/internal/e2elib/e2elib.go b/tests/internal/e2elib/e2elib.go
index 61efab6d32..b9f5b3aed3 100644
--- a/tests/internal/e2elib/e2elib.go
+++ b/tests/internal/e2elib/e2elib.go
@@ -347,7 +347,7 @@ func CleanupKindCluster(testsFailed bool, clusterName string) {
}
func installInferenceExtensionCRD(ctx context.Context) (err error) {
- const infExtURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml"
+ const infExtURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml"
return KubectlApplyManifest(ctx, infExtURL)
}
@@ -357,12 +357,12 @@ func installVLLMDeployment(ctx context.Context) (err error) {
}
func installInferenceModel(ctx context.Context) (err error) {
- const inferenceModelURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencemodel.yaml"
+ const inferenceModelURL = "https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.0.1/config/manifests/inferenceobjective.yaml"
return KubectlApplyManifest(ctx, inferenceModelURL)
}
func installInferencePoolResources(ctx context.Context) (err error) {
- const inferencePoolURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencepool-resources.yaml"
+ const inferencePoolURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.1/config/manifests/inferencepool-resources.yaml"
return KubectlApplyManifest(ctx, inferencePoolURL)
}