diff --git a/.golangci.yml b/.golangci.yml index 9f095e94e1..ba05b762db 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -44,8 +44,8 @@ linters: alias: egv1a1 - pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1 alias: aigv1a1 - - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2 - alias: gwaiev1a2 + - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1 + alias: gwaiev1 - pkg: k8s.io/apimachinery/pkg/apis/meta/v1 alias: metav1 - pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1 diff --git a/api/v1alpha1/ai_gateway_route.go b/api/v1alpha1/ai_gateway_route.go index 3eba1315c2..bcc8c958f5 100644 --- a/api/v1alpha1/ai_gateway_route.go +++ b/api/v1alpha1/ai_gateway_route.go @@ -266,7 +266,7 @@ type AIGatewayRouteRule struct { // It can reference either an AIServiceBackend or an InferencePool resource. // // +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together" -// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported" +// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported" type AIGatewayRouteRuleBackendRef struct { // Name is the name of the backend resource. // When Group and Kind are not specified, this refers to an AIServiceBackend. @@ -278,7 +278,7 @@ type AIGatewayRouteRuleBackendRef struct { // Group is the group of the backend resource. // When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend). - // Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources. + // Currently, only "inference.networking.k8s.io" is supported for InferencePool resources. // // +optional // +kubebuilder:validation:MaxLength=253 diff --git a/api/v1alpha1/ai_gateway_route_helper.go b/api/v1alpha1/ai_gateway_route_helper.go index a09891ab57..7779a2269f 100644 --- a/api/v1alpha1/ai_gateway_route_helper.go +++ b/api/v1alpha1/ai_gateway_route_helper.go @@ -15,7 +15,7 @@ const ( defaultRequestTimeout gwapiv1.Duration = "60s" // inferencePoolGroup is the API group for InferencePool resources. - inferencePoolGroup = "inference.networking.x-k8s.io" + inferencePoolGroup = "inference.networking.k8s.io" // inferencePoolKind is the kind for InferencePool resources. inferencePoolKind = "InferencePool" ) diff --git a/cmd/aigw/envoy-gateway-config.yaml b/cmd/aigw/envoy-gateway-config.yaml index 391bf17d24..ea0a22c4de 100644 --- a/cmd/aigw/envoy-gateway-config.yaml +++ b/cmd/aigw/envoy-gateway-config.yaml @@ -24,7 +24,7 @@ extensionApis: enableBackend: true extensionManager: backendResources: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool version: v1alpha2 hooks: diff --git a/docs/proposals/003-epp-integration-proposal/proposal.md b/docs/proposals/003-epp-integration-proposal/proposal.md index 1cfca6f866..da8d0c5147 100644 --- a/docs/proposals/003-epp-integration-proposal/proposal.md +++ b/docs/proposals/003-epp-integration-proposal/proposal.md @@ -51,7 +51,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below: ``` -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-llama3-8b-instruct @@ -81,7 +81,7 @@ spec: name: inference-gateway rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct matches: @@ -209,7 +209,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe - When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct` ``` -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-llama3-8b-instruct @@ -249,7 +249,7 @@ spec: value: vllm-llama3-8b-instruct backendRefs: - name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool ``` @@ -269,7 +269,7 @@ This approach is preferred because InferencePool resources do not require Backen - When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct` ```yaml -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-llama3-8b-instruct @@ -319,7 +319,7 @@ spec: name: OpenAI backendRef: name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool ``` @@ -384,7 +384,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h Take the configuration below as an example: ```yaml -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-llama3-8b-instruct @@ -417,7 +417,7 @@ spec: value: vllm-llama3-8b-instruct backendRefs: - name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool ``` @@ -582,7 +582,7 @@ spec: name: x-ai-eg-model value: meta-llama/Llama-3.1-8B-Instruct backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct - matches: @@ -591,7 +591,7 @@ spec: name: x-ai-eg-model value: mistral:latest backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: mistral - matches: @@ -619,7 +619,7 @@ spec: namespace: default rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct namespace: default diff --git a/examples/inference-pool/aigwroute.yaml b/examples/inference-pool/aigwroute.yaml index 88daa78d30..6e11dd6c7c 100644 --- a/examples/inference-pool/aigwroute.yaml +++ b/examples/inference-pool/aigwroute.yaml @@ -49,7 +49,7 @@ spec: name: Authorization value: sk-zyxwvutsrqponmlkjihgfedcba backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct - matches: @@ -58,7 +58,7 @@ spec: name: x-ai-eg-model value: mistral:latest backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: mistral - matches: diff --git a/examples/inference-pool/base.yaml b/examples/inference-pool/base.yaml index fde5878f15..a1979d3399 100644 --- a/examples/inference-pool/base.yaml +++ b/examples/inference-pool/base.yaml @@ -49,31 +49,40 @@ spec: initialDelaySeconds: 1 periodSeconds: 1 --- -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: mistral namespace: default spec: - targetPortNumber: 8080 + targetPorts: + - number: 8080 selector: - app: mistral-upstream - extensionRef: + matchLabels: + app: mistral-upstream + endpointPickerRef: name: mistral-epp + port: + number: 9002 --- apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel +kind: InferenceObjective metadata: name: mistral namespace: default spec: - modelName: mistral:latest - criticality: Critical + priority: 10 poolRef: - # Bind the InferenceModel to the InferencePool. + # Bind the InferenceObjective to the InferencePool. name: mistral --- apiVersion: v1 +kind: ServiceAccount +metadata: + name: mistral-epp + namespace: default +--- +apiVersion: v1 kind: Service metadata: name: mistral-epp @@ -105,26 +114,27 @@ spec: labels: app: mistral-epp spec: + serviceAccountName: mistral-epp # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: - name: epp - image: registry.k8s.io/gateway-api-inference-extension/epp:v0.5.1 + image: registry.k8s.io/gateway-api-inference-extension/epp:v1.0.1 imagePullPolicy: IfNotPresent args: - - -poolName + - --pool-name - "mistral" - - "-poolNamespace" + - "--pool-namespace" - "default" - - -v + - --v - "4" - --zap-encoder - "json" - - -grpcPort + - --grpc-port - "9002" - - -grpcHealthPort + - --grpc-health-port - "9003" - - "-configFile" + - "--config-file" - "/config/default-plugins.yaml" ports: - containerPort: 9002 @@ -158,95 +168,54 @@ metadata: namespace: default data: default-plugins.yaml: | - apiVersion: inference.networking.x-k8s.io/v1alpha1 - kind: EndpointPickerConfig - plugins: - - type: low-queue-filter - parameters: - threshold: 128 - - type: lora-affinity-filter - parameters: - threshold: 0.999 - - type: least-queue-filter - - type: least-kv-cache-filter - - type: decision-tree-filter - name: low-latency-filter - parameters: - current: - pluginRef: low-queue-filter - nextOnSuccess: - decisionTree: - current: - pluginRef: lora-affinity-filter - nextOnSuccessOrFailure: - decisionTree: - current: - pluginRef: least-queue-filter - nextOnSuccessOrFailure: - decisionTree: - current: - pluginRef: least-kv-cache-filter - nextOnFailure: - decisionTree: - current: - pluginRef: least-queue-filter - nextOnSuccessOrFailure: - decisionTree: - current: - pluginRef: lora-affinity-filter - nextOnSuccessOrFailure: - decisionTree: - current: - pluginRef: least-kv-cache-filter - - type: random-picker - parameters: - maxNumOfEndpoints: 1 - - type: single-profile-handler - schedulingProfiles: - - name: default - plugins: - - pluginRef: low-latency-filter - - pluginRef: random-picker - plugins-v2.yaml: | apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: EndpointPickerConfig plugins: - type: queue-scorer - - type: kv-cache-scorer + - type: kv-cache-utilization-scorer - type: prefix-cache-scorer - parameters: - hashBlockSize: 64 - maxPrefixBlocksToMatch: 256 - lruCapacityPerServer: 31250 - - type: max-score-picker - parameters: - maxNumOfEndpoints: 1 - - type: single-profile-handler schedulingProfiles: - name: default plugins: - pluginRef: queue-scorer - weight: 1 - - pluginRef: kv-cache-scorer - weight: 1 + - pluginRef: kv-cache-utilization-scorer - pluginRef: prefix-cache-scorer - weight: 1 - - pluginRef: max-score-picker --- -kind: ClusterRole +kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: pod-read + namespace: default rules: - apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] + resources: ["inferenceobjectives", "inferencepools"] verbs: ["get", "watch", "list"] - - apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] + - apiGroups: ["inference.networking.k8s.io"] + resources: ["inferencepools"] verbs: ["get", "watch", "list"] - apiGroups: [""] resources: ["pods"] verbs: ["get", "watch", "list"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read-binding + namespace: default +subjects: + - kind: ServiceAccount + name: mistral-epp + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-read +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-reviewer +rules: - apiGroups: - authentication.k8s.io resources: @@ -263,15 +232,15 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: pod-read-binding + name: auth-reviewer-binding subjects: - kind: ServiceAccount - name: default + name: mistral-epp namespace: default roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: pod-read + name: auth-reviewer --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend diff --git a/examples/inference-pool/config.yaml b/examples/inference-pool/config.yaml index 261c0fc817..3827de683e 100644 --- a/examples/inference-pool/config.yaml +++ b/examples/inference-pool/config.yaml @@ -42,9 +42,9 @@ data: enableBackend: true extensionManager: backendResources: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool - version: v1alpha2 + version: v1 hooks: xdsTranslator: translation: diff --git a/examples/inference-pool/httproute.yaml b/examples/inference-pool/httproute.yaml index ee4d3469f6..e68d873865 100644 --- a/examples/inference-pool/httproute.yaml +++ b/examples/inference-pool/httproute.yaml @@ -35,7 +35,7 @@ spec: namespace: default rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct namespace: default diff --git a/examples/inference-pool/with-annotations.yaml b/examples/inference-pool/with-annotations.yaml index fd9488dcc9..0698a1f7bc 100644 --- a/examples/inference-pool/with-annotations.yaml +++ b/examples/inference-pool/with-annotations.yaml @@ -53,7 +53,7 @@ spec: initialDelaySeconds: 1 periodSeconds: 1 --- -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: mistral-with-annotations @@ -68,22 +68,26 @@ metadata: # This corresponds to the AllowModeOverride field in Envoy's ExternalProcessor aigateway.envoyproxy.io/allow-mode-override: "true" spec: - targetPortNumber: 8080 + targetPorts: + - number: 8080 selector: - app: mistral-upstream - extensionRef: + matchLabels: + app: mistral-upstream + endpointPickerRef: name: mistral-epp-with-annotations + port: + number: 9002 --- apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel +kind: InferenceObjective metadata: name: mistral-with-annotations namespace: default spec: - modelName: mistral:latest - criticality: Critical + priority: 10 poolRef: - # Bind the InferenceModel to the InferencePool. + # Bind the InferenceObjective to the InferencePool. + group: inference.networking.k8s.io name: mistral-with-annotations --- apiVersion: v1 diff --git a/go.mod b/go.mod index e3687a0625..171847c2d5 100644 --- a/go.mod +++ b/go.mod @@ -67,7 +67,7 @@ require ( k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d sigs.k8s.io/controller-runtime v0.22.1 sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c - sigs.k8s.io/gateway-api-inference-extension v0.5.1 + sigs.k8s.io/gateway-api-inference-extension v1.0.1 sigs.k8s.io/yaml v1.6.0 ) diff --git a/go.sum b/go.sum index da00b44242..1ac441db35 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,9 @@ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI= github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= +github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= @@ -344,10 +347,10 @@ github.com/ohler55/ojg v1.26.10/go.mod h1:/Y5dGWkekv9ocnUixuETqiL58f+5pAsUfg5P8e github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= -github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= -github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= -github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= -github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/ginkgo/v2 v2.24.0 h1:obZz8LAnHicNdbBqvG3ytAFx8fgza+i1IDpBVcHT2YE= +github.com/onsi/ginkgo/v2 v2.24.0/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk= +github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= +github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k= @@ -661,8 +664,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c h1:GS4VnGRV90GEUjrgQ2GT5ii6yzWj3KtgUg+sVMdhs5c= sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= -sigs.k8s.io/gateway-api-inference-extension v0.5.1 h1:OMpt4gKlPWkD+h5kHcZZVh4926kix2DSBPI7X5ntuCA= -sigs.k8s.io/gateway-api-inference-extension v0.5.1/go.mod h1:lki0jx1qysZSZT4Ai2BxuAcpx6G8g5oBgOGuuJzjy/k= +sigs.k8s.io/gateway-api-inference-extension v1.0.1 h1:n/zyxk/1RCT1nNoCdKiZsN7XTz9mTk3Cu1fWWbtZMBw= +sigs.k8s.io/gateway-api-inference-extension v1.0.1/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/kubectl-validate v0.0.5-0.20250915070809-d2f2d68fba09 h1:JQbPOwLjSztom+aSDQIi6UZq8V0Gbv7BjAlYQSgycCI= diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go index ca06fd6161..5ec9a25d8c 100644 --- a/internal/controller/ai_gateway_route_test.go +++ b/internal/controller/ai_gateway_route_test.go @@ -437,7 +437,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), Weight: ptr.To(int32(100)), }, @@ -460,7 +460,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) { // Check the first rule (our InferencePool rule). backendRef := httpRoute.Spec.Rules[0].BackendRefs[0] - require.Equal(t, "inference.networking.x-k8s.io", string(*backendRef.Group)) + require.Equal(t, "inference.networking.k8s.io", string(*backendRef.Group)) require.Equal(t, "InferencePool", string(*backendRef.Kind)) require.Equal(t, "test-inference-pool", string(backendRef.Name)) require.Equal(t, "test-ns", string(*backendRef.Namespace)) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index b0abd2a8d3..1a4c9e2bb7 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -34,7 +34,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" gwapiv1a3 "sigs.k8s.io/gateway-api/apis/v1alpha3" gwapiv1b1 "sigs.k8s.io/gateway-api/apis/v1beta1" @@ -50,7 +50,7 @@ func init() { utilruntime.Must(gwapiv1.Install(Scheme)) utilruntime.Must(gwapiv1a3.Install(Scheme)) utilruntime.Must(gwapiv1b1.Install(Scheme)) - utilruntime.Must(gwaiev1a2.Install(Scheme)) + utilruntime.Must(gwaiev1.Install(Scheme)) } // Scheme contains the necessary schemes for the AI Gateway. @@ -161,7 +161,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con if err != nil { return fmt.Errorf("failed to create CRD client for inference extension: %w", err) } - const inferencePoolCRD = "inferencepools.inference.networking.x-k8s.io" + const inferencePoolCRD = "inferencepools.inference.networking.k8s.io" if _, crdErr := crdClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, inferencePoolCRD, metav1.GetOptions{}); crdErr != nil { if apierrors.IsNotFound(crdErr) { logger.Info("InferencePool CRD not found, skipping InferencePool controller. " + @@ -173,7 +173,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con // CRD exists, create the controller. inferencePoolC := NewInferencePoolController(c, kubernetes.NewForConfigOrDie(config), logger. WithName("inference-pool")) - if err = TypedControllerBuilderForCRD(mgr, &gwaiev1a2.InferencePool{}). + if err = TypedControllerBuilderForCRD(mgr, &gwaiev1.InferencePool{}). Watches(&gwapiv1.Gateway{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.gatewayEventHandler)). Watches(&aigv1a1.AIGatewayRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.aiGatewayRouteEventHandler)). Watches(&gwapiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.httpRouteEventHandler)). diff --git a/internal/controller/inference_pool.go b/internal/controller/inference_pool.go index 47381b5962..825f924120 100644 --- a/internal/controller/inference_pool.go +++ b/internal/controller/inference_pool.go @@ -17,13 +17,13 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" ) -// InferencePoolController implements [reconcile.TypedReconciler] for [gwaiev1a2.InferencePool]. +// InferencePoolController implements [reconcile.TypedReconciler] for [gwaiev1.InferencePool]. // // This handles the InferencePool resource and updates its status based on associated Gateways. // @@ -34,7 +34,7 @@ type InferencePoolController struct { logger logr.Logger } -// NewInferencePoolController creates a new reconcile.TypedReconciler for gwaiev1a2.InferencePool. +// NewInferencePoolController creates a new reconcile.TypedReconciler for gwaiev1.InferencePool. func NewInferencePoolController( client client.Client, kube kubernetes.Interface, logger logr.Logger, ) *InferencePoolController { @@ -45,9 +45,9 @@ func NewInferencePoolController( } } -// Reconcile implements the [reconcile.TypedReconciler] for [gwaiev1a2.InferencePool]. +// Reconcile implements the [reconcile.TypedReconciler] for [gwaiev1.InferencePool]. func (c *InferencePoolController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { - var inferencePool gwaiev1a2.InferencePool + var inferencePool gwaiev1.InferencePool if err := c.client.Get(ctx, req.NamespacedName, &inferencePool); err != nil { if client.IgnoreNotFound(err) == nil { c.logger.Info("Deleting InferencePool", @@ -69,7 +69,7 @@ func (c *InferencePoolController) Reconcile(ctx context.Context, req reconcile.R // syncInferencePool is the main logic for reconciling the InferencePool resource. // This is decoupled from the Reconcile method to centralize the error handling and status updates. -func (c *InferencePoolController) syncInferencePool(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) error { +func (c *InferencePoolController) syncInferencePool(ctx context.Context, inferencePool *gwaiev1.InferencePool) error { // Check if the ExtensionReference service exists. if err := c.validateExtensionReference(ctx, inferencePool); err != nil { return err @@ -97,7 +97,7 @@ func (c *InferencePoolController) routeReferencesInferencePool(route *aigv1a1.AI } // getReferencedGateways returns all Gateways that reference the given InferencePool. -func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) (map[string]*gwapiv1.Gateway, error) { +func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inferencePool *gwaiev1.InferencePool) (map[string]*gwapiv1.Gateway, error) { // Find all Gateways across all namespaces. var gateways gwapiv1.GatewayList if err := c.client.List(ctx, &gateways); err != nil { @@ -119,14 +119,9 @@ func (c *InferencePoolController) getReferencedGateways(ctx context.Context, inf } // validateExtensionReference checks if the ExtensionReference service exists. -func (c *InferencePoolController) validateExtensionReference(ctx context.Context, inferencePool *gwaiev1a2.InferencePool) error { - // Check if ExtensionRef is specified. - if inferencePool.Spec.ExtensionRef == nil { - return nil // No extension reference to validate. - } - +func (c *InferencePoolController) validateExtensionReference(ctx context.Context, inferencePool *gwaiev1.InferencePool) error { // Get the service name from ExtensionReference. - serviceName := inferencePool.Spec.ExtensionRef.Name + serviceName := inferencePool.Spec.EndpointPickerRef.Name if serviceName == "" { return fmt.Errorf("ExtensionReference name is empty") } @@ -218,7 +213,7 @@ func (c *InferencePoolController) routeReferencesGateway(parentRefs []gwapiv1.Pa func (c *InferencePoolController) httpRouteReferencesInferencePool(route *gwapiv1.HTTPRoute, inferencePoolName string) bool { for _, rule := range route.Spec.Rules { for _, backendRef := range rule.BackendRefs { - if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.x-k8s.io" && + if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.k8s.io" && backendRef.Kind != nil && string(*backendRef.Kind) == "InferencePool" && string(backendRef.Name) == inferencePoolName { return true @@ -229,7 +224,7 @@ func (c *InferencePoolController) httpRouteReferencesInferencePool(route *gwapiv } // updateInferencePoolStatus updates the status of the InferencePool. -func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, inferencePool *gwaiev1a2.InferencePool, conditionType string, message string) { +func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, inferencePool *gwaiev1.InferencePool, conditionType string, message string) { // Check if this is an ExtensionReference validation error. isExtensionRefError := conditionType == "NotAccepted" && (strings.Contains(message, "ExtensionReference service") && strings.Contains(message, "not found")) @@ -241,17 +236,17 @@ func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, } // Build Parents status. - var parents []gwaiev1a2.PoolStatus + var parents []gwaiev1.ParentStatus for _, gw := range referencedGateways { // Set Gateway group and kind according to Gateway API defaults. gatewayGroup := "gateway.networking.k8s.io" gatewayKind := "Gateway" - parentRef := gwaiev1a2.ParentGatewayReference{ - Group: (*gwaiev1a2.Group)(&gatewayGroup), - Kind: (*gwaiev1a2.Kind)(&gatewayKind), - Name: gwaiev1a2.ObjectName(gw.Name), - Namespace: (*gwaiev1a2.Namespace)(&gw.Namespace), + parentRef := gwaiev1.ParentReference{ + Group: (*gwaiev1.Group)(&gatewayGroup), + Kind: gwaiev1.Kind(gatewayKind), + Name: gwaiev1.ObjectName(gw.Name), + Namespace: gwaiev1.Namespace(gw.Namespace), } var conditions []metav1.Condition @@ -270,8 +265,8 @@ func (c *InferencePoolController) updateInferencePoolStatus(ctx context.Context, conditions = append(conditions, resolvedRefsCondition) } - parents = append(parents, gwaiev1a2.PoolStatus{ - GatewayRef: parentRef, + parents = append(parents, gwaiev1.ParentStatus{ + ParentRef: parentRef, Conditions: conditions, }) } @@ -313,7 +308,7 @@ func (c *InferencePoolController) gatewayEventHandler(ctx context.Context, obj c } // Find all InferencePools in the same namespace that might be affected by this Gateway. - var inferencePools gwaiev1a2.InferencePoolList + var inferencePools gwaiev1.InferencePoolList if err := c.client.List(ctx, &inferencePools, client.InNamespace(gateway.Namespace)); err != nil { c.logger.Error(err, "failed to list InferencePools for Gateway event", "gateway", gateway.Name) return nil @@ -371,7 +366,7 @@ func (c *InferencePoolController) httpRouteEventHandler(_ context.Context, obj c var requests []reconcile.Request for _, rule := range route.Spec.Rules { for _, backendRef := range rule.BackendRefs { - if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.x-k8s.io" && + if backendRef.Group != nil && string(*backendRef.Group) == "inference.networking.k8s.io" && backendRef.Kind != nil && string(*backendRef.Kind) == "InferencePool" { requests = append(requests, reconcile.Request{ NamespacedName: client.ObjectKey{ diff --git a/internal/controller/inference_pool_test.go b/internal/controller/inference_pool_test.go index 1cf03f4bfb..161056ff91 100644 --- a/internal/controller/inference_pool_test.go +++ b/internal/controller/inference_pool_test.go @@ -17,7 +17,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" @@ -28,7 +28,7 @@ func requireNewFakeClientWithIndexesAndInferencePool(t *testing.T) client.Client WithStatusSubresource(&aigv1a1.AIGatewayRoute{}). WithStatusSubresource(&aigv1a1.AIServiceBackend{}). WithStatusSubresource(&aigv1a1.BackendSecurityPolicy{}). - WithStatusSubresource(&gwaiev1a2.InferencePool{}) + WithStatusSubresource(&gwaiev1.InferencePool{}) err := ApplyIndexing(t.Context(), func(_ context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error { builder = builder.WithIndex(obj, field, extractValue) return nil @@ -42,22 +42,18 @@ func TestInferencePoolController_ExtensionReferenceValidation(t *testing.T) { c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Create an InferencePool with ExtensionReference pointing to a non-existent service. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "non-existent-service", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "non-existent-service", }, }, } @@ -75,7 +71,7 @@ func TestInferencePoolController_ExtensionReferenceValidation(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status was updated with ResolvedRefs condition. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -107,22 +103,18 @@ func TestInferencePoolController_ExtensionReferenceValidationSuccess(t *testing. require.NoError(t, fakeClient.Create(context.Background(), service)) // Create an InferencePool with ExtensionReference pointing to the existing service. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "existing-service", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "existing-service", }, }, } @@ -139,7 +131,7 @@ func TestInferencePoolController_ExtensionReferenceValidationSuccess(t *testing. require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status was updated successfully. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -198,7 +190,7 @@ func TestInferencePoolController_Reconcile(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), Weight: ptr.To(int32(100)), }, @@ -210,22 +202,18 @@ func TestInferencePoolController_Reconcile(t *testing.T) { require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute)) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } @@ -242,7 +230,7 @@ func TestInferencePoolController_Reconcile(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status was updated. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -252,10 +240,10 @@ func TestInferencePoolController_Reconcile(t *testing.T) { require.Len(t, updatedInferencePool.Status.Parents, 1) parent := updatedInferencePool.Status.Parents[0] - require.Equal(t, "gateway.networking.k8s.io", string(*parent.GatewayRef.Group)) - require.Equal(t, "Gateway", string(*parent.GatewayRef.Kind)) - require.Equal(t, "test-gateway", string(parent.GatewayRef.Name)) - require.Equal(t, "default", string(*parent.GatewayRef.Namespace)) + require.Equal(t, "gateway.networking.k8s.io", string(*parent.ParentRef.Group)) + require.Equal(t, "Gateway", string(parent.ParentRef.Kind)) + require.Equal(t, "test-gateway", string(parent.ParentRef.Name)) + require.Equal(t, "default", string(parent.ParentRef.Namespace)) // Verify that the conditions are set correctly. require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions") @@ -303,22 +291,18 @@ func TestInferencePoolController_NoReferencingGateways(t *testing.T) { require.NoError(t, fakeClient.Create(context.Background(), service)) // Create an InferencePool without any referencing AIGatewayRoutes. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } @@ -335,7 +319,7 @@ func TestInferencePoolController_NoReferencingGateways(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status was updated. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -410,7 +394,7 @@ func TestInferencePoolController_HTTPRouteReferencesInferencePool(t *testing.T) { BackendRef: gwapiv1.BackendRef{ BackendObjectReference: gwapiv1.BackendObjectReference{ - Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")), + Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")), Kind: ptr.To(gwapiv1.Kind("InferencePool")), Name: "test-inference-pool", }, @@ -529,7 +513,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -557,16 +541,16 @@ func TestInferencePoolController_gatewayEventHandler(t *testing.T) { c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } require.NoError(t, fakeClient.Create(context.Background(), inferencePool)) @@ -588,7 +572,7 @@ func TestInferencePoolController_gatewayEventHandler(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -632,7 +616,7 @@ func TestInferencePoolController_aiGatewayRouteEventHandler(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -666,7 +650,7 @@ func TestInferencePoolController_httpRouteEventHandler(t *testing.T) { { BackendRef: gwapiv1.BackendRef{ BackendObjectReference: gwapiv1.BackendObjectReference{ - Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")), + Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")), Kind: ptr.To(gwapiv1.Kind("InferencePool")), Name: "test-inference-pool", }, @@ -700,48 +684,19 @@ func TestInferencePoolController_EdgeCases(t *testing.T) { require.NoError(t, err, "Should not error when InferencePool doesn't exist") require.Equal(t, ctrl.Result{}, result) - // Test InferencePool without ExtensionRef. - inferencePoolNoExtRef := &gwaiev1a2.InferencePool{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-inference-pool-no-ext", - Namespace: "default", - }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ - "app": "test-app", - }, - TargetPortNumber: 8080, - // No ExtensionRef. - }, - } - require.NoError(t, fakeClient.Create(context.Background(), inferencePoolNoExtRef)) - - result, err = c.Reconcile(context.Background(), ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-inference-pool-no-ext", - Namespace: "default", - }, - }) - require.NoError(t, err, "Should not error when InferencePool has no ExtensionRef") - require.Equal(t, ctrl.Result{}, result) - // Test InferencePool with empty ExtensionRef name. - inferencePoolEmptyExtRef := &gwaiev1a2.InferencePool{ + inferencePoolEmptyExtRef := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-empty-ext", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "", // Empty name. - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "", // Empty name. }, }, } @@ -792,7 +747,7 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -819,22 +774,18 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) { require.NoError(t, fakeClient.Create(context.Background(), service)) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } @@ -851,7 +802,7 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status was updated with the cross-namespace Gateway. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -861,10 +812,10 @@ func TestInferencePoolController_CrossNamespaceReferences(t *testing.T) { require.Len(t, updatedInferencePool.Status.Parents, 1) parent := updatedInferencePool.Status.Parents[0] - require.Equal(t, "gateway.networking.k8s.io", string(*parent.GatewayRef.Group)) - require.Equal(t, "Gateway", string(*parent.GatewayRef.Kind)) - require.Equal(t, "test-gateway", string(parent.GatewayRef.Name)) - require.Equal(t, "gateway-namespace", string(*parent.GatewayRef.Namespace)) + require.Equal(t, "gateway.networking.k8s.io", string(*parent.ParentRef.Group)) + require.Equal(t, "Gateway", string(parent.ParentRef.Kind)) + require.Equal(t, "test-gateway", string(parent.ParentRef.Name)) + require.Equal(t, "gateway-namespace", string(parent.ParentRef.Namespace)) } func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) { @@ -900,7 +851,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -911,17 +862,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) { require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute)) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", Generation: 5, // Set a specific generation for testing. }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } require.NoError(t, fakeClient.Create(context.Background(), inferencePool)) @@ -930,7 +881,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) { c.updateInferencePoolStatus(context.Background(), inferencePool, "NotAccepted", "test error message") // Check that the status was updated. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool", Namespace: "default", @@ -940,7 +891,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus(t *testing.T) { require.Len(t, updatedInferencePool.Status.Parents, 1) parent := updatedInferencePool.Status.Parents[0] - require.Equal(t, "test-gateway", string(parent.GatewayRef.Name)) + require.Equal(t, "test-gateway", string(parent.ParentRef.Name)) require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions") // Find the conditions. @@ -970,16 +921,16 @@ func TestInferencePoolController_GetReferencedGateways_ErrorHandling(t *testing. c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } @@ -1000,7 +951,7 @@ func TestInferencePoolController_GetReferencedGateways_ErrorHandling(t *testing. BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1052,7 +1003,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool_HTTPRoute(t *tes { BackendRef: gwapiv1.BackendRef{ BackendObjectReference: gwapiv1.BackendObjectReference{ - Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")), + Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")), Kind: ptr.To(gwapiv1.Kind("InferencePool")), Name: "test-inference-pool", }, @@ -1081,45 +1032,6 @@ func TestInferencePoolController_GatewayReferencesInferencePool_HTTPRoute(t *tes func TestInferencePoolController_ValidateExtensionReference_EdgeCases(t *testing.T) { fakeClient := requireNewFakeClientWithIndexesAndInferencePool(t) c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) - - // Test with nil ExtensionRef. - inferencePoolNilExt := &gwaiev1a2.InferencePool{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-inference-pool-nil-ext", - Namespace: "default", - }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ - "app": "test-app", - }, - TargetPortNumber: 8080, - // No EndpointPickerConfig. - }, - } - - err := c.validateExtensionReference(context.Background(), inferencePoolNilExt) - require.NoError(t, err, "Should not error when ExtensionRef is nil") - - // Test with ExtensionRef but nil ExtensionRef field. - inferencePoolNilExtRef := &gwaiev1a2.InferencePool{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-inference-pool-nil-extref", - Namespace: "default", - }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ - "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - // ExtensionRef is nil. - }, - }, - } - - err = c.validateExtensionReference(context.Background(), inferencePoolNilExtRef) - require.NoError(t, err, "Should not error when ExtensionRef field is nil") - // Test with service in different namespace (should fail). serviceOtherNS := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -1136,27 +1048,23 @@ func TestInferencePoolController_ValidateExtensionReference_EdgeCases(t *testing } require.NoError(t, fakeClient.Create(context.Background(), serviceOtherNS)) - inferencePoolOtherNS := &gwaiev1a2.InferencePool{ + inferencePoolOtherNS := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-other-ns", Namespace: "default", // InferencePool in default namespace. }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "service-other-ns", // Service in other-namespace. - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "service-other-ns", // Refers to service in other-namespace. }, }, } - err = c.validateExtensionReference(context.Background(), inferencePoolOtherNS) + err := c.validateExtensionReference(context.Background(), inferencePoolOtherNS) require.Error(t, err, "Should error when ExtensionReference service is in different namespace") require.Contains(t, err.Error(), "ExtensionReference service service-other-ns not found in namespace default") } @@ -1166,22 +1074,18 @@ func TestInferencePoolController_Reconcile_ErrorHandling(t *testing.T) { c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Test reconcile with InferencePool that has empty ExtensionRef name. - inferencePoolEmptyName := &gwaiev1a2.InferencePool{ + inferencePoolEmptyName := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-empty-name", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "", // Empty name. - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "", // Empty name. }, }, } @@ -1199,22 +1103,18 @@ func TestInferencePoolController_Reconcile_ErrorHandling(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Test reconcile with InferencePool that has non-existent ExtensionRef service. - inferencePoolNonExistentService := &gwaiev1a2.InferencePool{ + inferencePoolNonExistentService := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-non-existent", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "non-existent-service", - }, - }, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "non-existent-service", }, }, } @@ -1237,16 +1137,16 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) { c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Test syncInferencePool with InferencePool that has no referenced gateways. - inferencePoolNoGateways := &gwaiev1a2.InferencePool{ + inferencePoolNoGateways := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-no-gateways", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } require.NoError(t, fakeClient.Create(context.Background(), inferencePoolNoGateways)) @@ -1266,13 +1166,8 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) { }, } require.NoError(t, fakeClient.Create(context.Background(), service)) - - inferencePoolNoGateways.Spec.EndpointPickerConfig = gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp-no-gateways", - }, - }, + inferencePoolNoGateways.Spec.EndpointPickerRef = gwaiev1.EndpointPickerRef{ + Name: "test-epp-no-gateways", } require.NoError(t, fakeClient.Update(context.Background(), inferencePoolNoGateways)) @@ -1287,7 +1182,7 @@ func TestInferencePoolController_SyncInferencePool_EdgeCases(t *testing.T) { require.Equal(t, ctrl.Result{}, result) // Check that the InferencePool status is empty (no parents). - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool-no-gateways", Namespace: "default", @@ -1301,16 +1196,16 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi c := NewInferencePoolController(fakeClient, kubefake.NewSimpleClientset(), ctrl.Log) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-complex", Namespace: "default", }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } @@ -1354,7 +1249,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool-complex", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1381,7 +1276,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool-complex", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1411,7 +1306,7 @@ func TestInferencePoolController_GetReferencedGateways_ComplexScenarios(t *testi { BackendRef: gwapiv1.BackendRef{ BackendObjectReference: gwapiv1.BackendObjectReference{ - Group: ptr.To(gwapiv1.Group("inference.networking.x-k8s.io")), + Group: ptr.To(gwapiv1.Group("inference.networking.k8s.io")), Kind: ptr.To(gwapiv1.Kind("InferencePool")), Name: "test-inference-pool-complex", }, @@ -1484,7 +1379,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool-multi", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1510,7 +1405,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool-multi", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1521,17 +1416,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute2)) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-multi", Namespace: "default", Generation: 10, // Set a specific generation for testing. }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } require.NoError(t, fakeClient.Create(context.Background(), inferencePool)) @@ -1540,7 +1435,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_MultipleGateways(t *t c.updateInferencePoolStatus(context.Background(), inferencePool, "Accepted", "all references resolved") // Check that the status was updated for both gateways. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool-multi", Namespace: "default", @@ -1613,7 +1508,7 @@ func TestInferencePoolController_GatewayReferencesInferencePool_NoRoutes(t *test BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1688,7 +1583,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t * BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ { Name: "test-inference-pool-ext-error", - Group: ptr.To("inference.networking.x-k8s.io"), + Group: ptr.To("inference.networking.k8s.io"), Kind: ptr.To("InferencePool"), }, }, @@ -1699,17 +1594,17 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t * require.NoError(t, fakeClient.Create(context.Background(), aiGatewayRoute)) // Create an InferencePool. - inferencePool := &gwaiev1a2.InferencePool{ + inferencePool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-inference-pool-ext-error", Namespace: "default", Generation: 15, // Set a specific generation for testing. }, - Spec: gwaiev1a2.InferencePoolSpec{ - Selector: map[gwaiev1a2.LabelKey]gwaiev1a2.LabelValue{ + Spec: gwaiev1.InferencePoolSpec{ + Selector: gwaiev1.LabelSelector{MatchLabels: map[gwaiev1.LabelKey]gwaiev1.LabelValue{ "app": "test-app", - }, - TargetPortNumber: 8080, + }}, + TargetPorts: []gwaiev1.Port{{Number: 8080}}, }, } require.NoError(t, fakeClient.Create(context.Background(), inferencePool)) @@ -1719,7 +1614,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t * c.updateInferencePoolStatus(context.Background(), inferencePool, "NotAccepted", extRefErrorMessage) // Check that the status was updated with ExtensionReference error. - var updatedInferencePool gwaiev1a2.InferencePool + var updatedInferencePool gwaiev1.InferencePool require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{ Name: "test-inference-pool-ext-error", Namespace: "default", @@ -1729,7 +1624,7 @@ func TestInferencePoolController_UpdateInferencePoolStatus_ExtensionRefError(t * require.Len(t, updatedInferencePool.Status.Parents, 1) parent := updatedInferencePool.Status.Parents[0] - require.Equal(t, "test-gateway-ext-error", string(parent.GatewayRef.Name)) + require.Equal(t, "test-gateway-ext-error", string(parent.ParentRef.Name)) require.Len(t, parent.Conditions, 2, "Should have both Accepted and ResolvedRefs conditions") // Find the conditions. diff --git a/internal/extensionserver/extensionserver_test.go b/internal/extensionserver/extensionserver_test.go index 2a83efb36c..48fe405a3e 100644 --- a/internal/extensionserver/extensionserver_test.go +++ b/internal/extensionserver/extensionserver_test.go @@ -35,7 +35,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" "github.com/envoyproxy/ai-gateway/internal/controller" @@ -186,7 +186,7 @@ func Test_maybeModifyCluster(t *testing.T) { func createInferencePoolExtensionResource(name, namespace string) *egextension.ExtensionResource { unstructuredObj := &unstructured.Unstructured{ Object: map[string]any{ - "apiVersion": "inference.networking.x-k8s.io/v1alpha2", + "apiVersion": "inference.networking.k8s.io/v1", "kind": "InferencePool", "metadata": map[string]any{ "name": name, @@ -617,20 +617,16 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { s := New(newFakeClient(), logr.Discard(), udsPath, false) // Helper function to create an InferencePool. - createInferencePool := func(name, namespace string) *gwaiev1a2.InferencePool { - return &gwaiev1a2.InferencePool{ + createInferencePool := func(name, namespace string) *gwaiev1.InferencePool { + return &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, }, - Spec: gwaiev1a2.InferencePoolSpec{ - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } @@ -659,7 +655,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { listener := &listenerv3.Listener{ Name: "test-listener", } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchListenerWithInferencePoolFilters(listener, pools) // Should handle gracefully when no filter chains exist. @@ -679,7 +675,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { }, }, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} server.patchListenerWithInferencePoolFilters(listener, pools) require.Contains(t, buf.String(), "failed to find an HCM in the current chain") @@ -692,7 +688,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { } listener := createListenerWithHCM("test-listener", existingFilters) - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchListenerWithInferencePoolFilters(listener, pools) @@ -709,7 +705,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { } listener := createListenerWithHCM("test-listener", existingFilters) - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchListenerWithInferencePoolFilters(listener, pools) @@ -728,7 +724,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { } listener := createListenerWithHCM("test-listener", existingFilters) - pools := []*gwaiev1a2.InferencePool{ + pools := []*gwaiev1.InferencePool{ createInferencePool("pool1", "test-ns"), createInferencePool("pool2", "test-ns"), } @@ -774,7 +770,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { }, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchListenerWithInferencePoolFilters(listener, pools) @@ -802,7 +798,7 @@ func TestPatchListenerWithInferencePoolFilters(t *testing.T) { listener := createListenerWithHCM("test-listener", []*httpconnectionmanagerv3.HttpFilter{ {Name: "envoy.filters.http.router"}, }) - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} server.patchListenerWithInferencePoolFilters(listener, pools) // This test mainly ensures the error handling path is covered. @@ -815,27 +811,23 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { s := New(newFakeClient(), logr.Discard(), udsPath, false) // Helper function to create an InferencePool. - createInferencePool := func(name, namespace string) *gwaiev1a2.InferencePool { - return &gwaiev1a2.InferencePool{ + createInferencePool := func(name, namespace string) *gwaiev1.InferencePool { + return &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, }, - Spec: gwaiev1a2.InferencePoolSpec{ - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } } // Helper function to create a route with InferencePool metadata. - createRouteWithInferencePool := func(routeName string, pool *gwaiev1a2.InferencePool) *routev3.Route { + createRouteWithInferencePool := func(routeName string, pool *gwaiev1.InferencePool) *routev3.Route { metadata := &corev3.Metadata{ FilterMetadata: map[string]*structpb.Struct{ internalapi.InternalEndpointMetadataNamespace: { @@ -859,7 +851,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{}, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchVirtualHostWithInferencePool(vh, pools) // Should handle gracefully when no routes exist. @@ -873,7 +865,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{normalRoute}, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchVirtualHostWithInferencePool(vh, pools) @@ -891,7 +883,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{inferenceRoute}, } - pools := []*gwaiev1a2.InferencePool{pool} + pools := []*gwaiev1.InferencePool{pool} s.patchVirtualHostWithInferencePool(vh, pools) @@ -914,7 +906,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{inferenceRoute}, } - pools := []*gwaiev1a2.InferencePool{pool1, pool2} + pools := []*gwaiev1.InferencePool{pool1, pool2} s.patchVirtualHostWithInferencePool(vh, pools) @@ -946,7 +938,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{directResponseRoute}, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchVirtualHostWithInferencePool(vh, pools) @@ -972,7 +964,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{directResponseRoute}, } - pools := []*gwaiev1a2.InferencePool{createInferencePool("test-pool", "test-ns")} + pools := []*gwaiev1.InferencePool{createInferencePool("test-pool", "test-ns")} s.patchVirtualHostWithInferencePool(vh, pools) @@ -994,7 +986,7 @@ func TestPatchVirtualHostWithInferencePool(t *testing.T) { Name: "test-vh", Routes: []*routev3.Route{normalRoute, inferenceRoute1, inferenceRoute2}, } - pools := []*gwaiev1a2.InferencePool{pool1, pool2} + pools := []*gwaiev1.InferencePool{pool1, pool2} s.patchVirtualHostWithInferencePool(vh, pools) @@ -1209,19 +1201,15 @@ func TestConstructInferencePoolsFrom(t *testing.T) { // TestInferencePoolHelperFunctions tests various helper functions for InferencePool. func TestInferencePoolHelperFunctions(t *testing.T) { // Create a test InferencePool. - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", }, - Spec: gwaiev1a2.InferencePoolSpec{ - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: "test-epp", }, }, } @@ -1253,8 +1241,8 @@ func TestInferencePoolHelperFunctions(t *testing.T) { t.Run("portForInferencePool custom", func(t *testing.T) { customPool := pool.DeepCopy() - customPort := gwaiev1a2.PortNumber(8888) - customPool.Spec.ExtensionRef.PortNumber = &customPort + customPort := gwaiev1.PortNumber(8888) + customPool.Spec.EndpointPickerRef.Port = &gwaiev1.Port{Number: customPort} port := portForInferencePool(customPool) require.Equal(t, uint32(8888), port) }) @@ -1264,7 +1252,7 @@ func TestInferencePoolHelperFunctions(t *testing.T) { func TestInferencePoolAnnotationHelpers(t *testing.T) { t.Run("getProcessingBodyModeFromAnnotations", func(t *testing.T) { t.Run("no annotations", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1275,7 +1263,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to duplex", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1289,7 +1277,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to buffered", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1303,7 +1291,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to invalid value", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1319,7 +1307,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { t.Run("getAllowModeOverrideFromAnnotations", func(t *testing.T) { t.Run("no annotations", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1330,7 +1318,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to true", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1344,7 +1332,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to false", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1358,7 +1346,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to invalid value", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1374,7 +1362,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { t.Run("getProcessingBodyModeStringFromAnnotations", func(t *testing.T) { t.Run("no annotations", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1385,7 +1373,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to duplex", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1399,7 +1387,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to buffered", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1413,7 +1401,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to invalid value", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1429,7 +1417,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { t.Run("getAllowModeOverrideStringFromAnnotations", func(t *testing.T) { t.Run("no annotations", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1440,7 +1428,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to true", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1454,7 +1442,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to false", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1468,7 +1456,7 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { }) t.Run("annotation set to invalid value", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1486,19 +1474,13 @@ func TestInferencePoolAnnotationHelpers(t *testing.T) { // TestBuildHTTPFilterForInferencePool tests the buildHTTPFilterForInferencePool function with annotations. func TestBuildHTTPFilterForInferencePool(t *testing.T) { t.Run("default configuration", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", }, - Spec: gwaiev1a2.InferencePoolSpec{ - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"}, }, } @@ -1512,7 +1494,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { }) t.Run("with buffered mode annotation", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1520,14 +1502,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { "aigateway.envoyproxy.io/processing-body-mode": "buffered", }, }, - Spec: gwaiev1a2.InferencePoolSpec{ - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"}, }, } @@ -1541,7 +1517,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { }) t.Run("with allow mode override annotation", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1549,14 +1525,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { "aigateway.envoyproxy.io/allow-mode-override": "true", }, }, - Spec: gwaiev1a2.InferencePoolSpec{ - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"}, }, } @@ -1570,7 +1540,7 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { }) t.Run("with both annotations", func(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -1579,14 +1549,8 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { "aigateway.envoyproxy.io/allow-mode-override": "true", }, }, - Spec: gwaiev1a2.InferencePoolSpec{ - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"}, }, } @@ -1602,20 +1566,14 @@ func TestBuildHTTPFilterForInferencePool(t *testing.T) { // TestBuildExtProcClusterForInferencePoolEndpointPicker tests cluster building. func TestBuildExtProcClusterForInferencePoolEndpointPicker(t *testing.T) { - pool := &gwaiev1a2.InferencePool{ + pool := &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", }, - Spec: gwaiev1a2.InferencePoolSpec{ - TargetPortNumber: 8080, - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: "test-epp", - }, - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + TargetPorts: []gwaiev1.Port{{Number: 8080}}, + EndpointPickerRef: gwaiev1.EndpointPickerRef{Name: "test-epp"}, }, } @@ -1634,14 +1592,6 @@ func TestBuildExtProcClusterForInferencePoolEndpointPicker(t *testing.T) { buildExtProcClusterForInferencePoolEndpointPicker(nil) }) }) - - t.Run("nil ExtensionRef panics", func(t *testing.T) { - invalidPool := pool.DeepCopy() - invalidPool.Spec.ExtensionRef = nil - require.Panics(t, func() { - buildExtProcClusterForInferencePoolEndpointPicker(invalidPool) - }) - }) } // TestBuildClustersForInferencePoolEndpointPickers tests building clusters from existing clusters. diff --git a/internal/extensionserver/inferencepool.go b/internal/extensionserver/inferencepool.go index 6911bb34cb..aaef644a46 100644 --- a/internal/extensionserver/inferencepool.go +++ b/internal/extensionserver/inferencepool.go @@ -31,7 +31,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/envoyproxy/ai-gateway/internal/internalapi" ) @@ -53,11 +53,11 @@ const ( allowModeOverrideAnnotation = "aigateway.envoyproxy.io/allow-mode-override" ) -func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.ExtensionResource) []*gwaiev1a2.InferencePool { +func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.ExtensionResource) []*gwaiev1.InferencePool { // Parse InferencePool resources from BackendExtensionResources. // BackendExtensionResources contains unstructured Kubernetes resources that were // referenced in the AIGatewayRoute's BackendRefs with non-empty Group and Kind fields. - var inferencePools []*gwaiev1a2.InferencePool + var inferencePools []*gwaiev1.InferencePool for _, resource := range extensionResources { // Unmarshal the unstructured bytes to get the Kubernetes resource. // The resource is stored as JSON bytes in the extension context. @@ -69,11 +69,11 @@ func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.E // Check if this is an InferencePool resource from the Gateway API Inference Extension. // We only process InferencePool resources; other extension resources are ignored. - if unstructuredObj.GetAPIVersion() == "inference.networking.x-k8s.io/v1alpha2" && + if unstructuredObj.GetAPIVersion() == "inference.networking.k8s.io/v1" && unstructuredObj.GetKind() == "InferencePool" { // Convert unstructured object to strongly-typed InferencePool. // This allows us to access the InferencePool's spec fields safely. - var pool gwaiev1a2.InferencePool + var pool gwaiev1.InferencePool if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.Object, &pool); err != nil { s.log.Error(err, "failed to convert unstructured to InferencePool", "name", unstructuredObj.GetName(), "namespace", unstructuredObj.GetNamespace()) @@ -87,7 +87,7 @@ func (s *Server) constructInferencePoolsFrom(extensionResources []*egextension.E } // getInferencePoolByMetadata returns the InferencePool from the cluster metadata. -func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool { +func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1.InferencePool { var metadata string if meta != nil && meta.FilterMetadata != nil { m, ok := meta.FilterMetadata[internalapi.InternalEndpointMetadataNamespace] @@ -112,7 +112,7 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool } processingBodyMode := result[4] allowModeOverride := result[5] - return &gwaiev1a2.InferencePool{ + return &gwaiev1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: ns, @@ -121,14 +121,10 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool allowModeOverrideAnnotation: allowModeOverride, }, }, - Spec: gwaiev1a2.InferencePoolSpec{ - EndpointPickerConfig: gwaiev1a2.EndpointPickerConfig{ - ExtensionRef: &gwaiev1a2.Extension{ - ExtensionReference: gwaiev1a2.ExtensionReference{ - Name: gwaiev1a2.ObjectName(serviceName), - PortNumber: ptr.To(gwaiev1a2.PortNumber(port)), - }, - }, + Spec: gwaiev1.InferencePoolSpec{ + EndpointPickerRef: gwaiev1.EndpointPickerRef{ + Name: gwaiev1.ObjectName(serviceName), + Port: ptr.To(gwaiev1.Port{Number: gwaiev1.PortNumber(port)}), }, }, } @@ -136,19 +132,19 @@ func getInferencePoolByMetadata(meta *corev3.Metadata) *gwaiev1a2.InferencePool // buildMetadataForInferencePool adds InferencePool metadata to the cluster for reference by other components. // encoded as a string in the format: "namespace/name/serviceName/port". -func buildEPPMetadataForCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1a2.InferencePool) { +func buildEPPMetadataForCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1.InferencePool) { // Initialize cluster metadata structure if not present. buildEPPMetadata(cluster.Metadata, inferencePool) } // buildMetadataForInferencePool adds InferencePool metadata to the route for reference by other components. -func buildEPPMetadataForRoute(route *routev3.Route, inferencePool *gwaiev1a2.InferencePool) { +func buildEPPMetadataForRoute(route *routev3.Route, inferencePool *gwaiev1.InferencePool) { // Initialize route metadata structure if not present. buildEPPMetadata(route.Metadata, inferencePool) } // buildEPPMetadata adds InferencePool metadata to the given metadata structure. -func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1a2.InferencePool) { +func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1.InferencePool) { // Initialize cluster metadata structure if not present. if metadata == nil { metadata = &corev3.Metadata{} @@ -178,7 +174,7 @@ func buildEPPMetadata(metadata *corev3.Metadata, inferencePool *gwaiev1a2.Infere clusterRefInferencePool( inferencePool.Namespace, inferencePool.Name, - string(inferencePool.Spec.ExtensionRef.Name), + string(inferencePool.Spec.EndpointPickerRef.Name), portForInferencePool(inferencePool), processingBodyMode, allowModeOverride, @@ -201,13 +197,10 @@ func buildClustersForInferencePoolEndpointPickers(clusters []*clusterv3.Cluster) // buildExtProcClusterForInferencePoolEndpointPicker builds and returns a "STRICT_DNS" cluster // for connecting to the InferencePool's endpoint picker service. -func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1a2.InferencePool) *clusterv3.Cluster { +func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1.InferencePool) *clusterv3.Cluster { if pool == nil { panic("InferencePool cannot be nil") } - if pool.Spec.ExtensionRef == nil { - panic("InferencePool ExtensionRef cannot be nil") - } name := clusterNameForInferencePool(pool) c := &clusterv3.Cluster{ @@ -276,7 +269,7 @@ func buildExtProcClusterForInferencePoolEndpointPicker(pool *gwaiev1a2.Inference } // buildInferencePoolHTTPFilter returns a HTTP filter for InferencePool. -func buildInferencePoolHTTPFilter(pool *gwaiev1a2.InferencePool) *httpconnectionmanagerv3.HttpFilter { +func buildInferencePoolHTTPFilter(pool *gwaiev1.InferencePool) *httpconnectionmanagerv3.HttpFilter { poolFilter := buildHTTPFilterForInferencePool(pool) return &httpconnectionmanagerv3.HttpFilter{ Name: httpFilterNameForInferencePool(pool), @@ -285,7 +278,7 @@ func buildInferencePoolHTTPFilter(pool *gwaiev1a2.InferencePool) *httpconnection } // buildHTTPFilterForInferencePool returns the HTTP filter for the given InferencePool. -func buildHTTPFilterForInferencePool(pool *gwaiev1a2.InferencePool) *extprocv3.ExternalProcessor { +func buildHTTPFilterForInferencePool(pool *gwaiev1.InferencePool) *extprocv3.ExternalProcessor { // Read processing body mode from annotations, default to "duplex" (FULL_DUPLEX_STREAMED) processingBodyMode := getProcessingBodyModeFromAnnotations(pool) @@ -317,7 +310,7 @@ func buildHTTPFilterForInferencePool(pool *gwaiev1a2.InferencePool) *extprocv3.E // getProcessingBodyModeFromAnnotations reads the processing body mode from InferencePool annotations. // Returns FULL_DUPLEX_STREAMED for "duplex" (default) or BUFFERED for "buffered". -func getProcessingBodyModeFromAnnotations(pool *gwaiev1a2.InferencePool) extprocv3.ProcessingMode_BodySendMode { +func getProcessingBodyModeFromAnnotations(pool *gwaiev1.InferencePool) extprocv3.ProcessingMode_BodySendMode { annotations := pool.GetAnnotations() if annotations == nil { return extprocv3.ProcessingMode_FULL_DUPLEX_STREAMED // default to duplex @@ -341,7 +334,7 @@ func getProcessingBodyModeFromAnnotations(pool *gwaiev1a2.InferencePool) extproc // getAllowModeOverrideFromAnnotations reads the allow mode override setting from InferencePool annotations. // Returns false by default, true if annotation is set to "true". -func getAllowModeOverrideFromAnnotations(pool *gwaiev1a2.InferencePool) bool { +func getAllowModeOverrideFromAnnotations(pool *gwaiev1.InferencePool) bool { annotations := pool.GetAnnotations() if annotations == nil { return false // default to false @@ -356,7 +349,7 @@ func getAllowModeOverrideFromAnnotations(pool *gwaiev1a2.InferencePool) bool { } // getProcessingBodyModeStringFromAnnotations reads the processing body mode from InferencePool annotations. -func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1a2.InferencePool) string { +func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1.InferencePool) string { annotations := pool.GetAnnotations() if annotations == nil { return "duplex" // default to duplex @@ -371,7 +364,7 @@ func getProcessingBodyModeStringFromAnnotations(pool *gwaiev1a2.InferencePool) s } // getAllowModeOverrideStringFromAnnotations reads the allow mode override setting from InferencePool annotations. -func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1a2.InferencePool) string { +func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1.InferencePool) string { annotations := pool.GetAnnotations() if annotations == nil { return "false" // default to false @@ -386,39 +379,39 @@ func getAllowModeOverrideStringFromAnnotations(pool *gwaiev1a2.InferencePool) st } // authorityForInferencePool formats the gRPC authority based on the given InferencePool. -func authorityForInferencePool(pool *gwaiev1a2.InferencePool) string { +func authorityForInferencePool(pool *gwaiev1.InferencePool) string { ns := pool.GetNamespace() - svc := pool.Spec.ExtensionRef.Name + svc := pool.Spec.EndpointPickerRef.Name return fmt.Sprintf("%s.%s.svc:%d", svc, ns, portForInferencePool(pool)) } // dnsNameForInferencePool formats the DNS name based on the given InferencePool. -func dnsNameForInferencePool(pool *gwaiev1a2.InferencePool) string { +func dnsNameForInferencePool(pool *gwaiev1.InferencePool) string { ns := pool.GetNamespace() - svc := pool.Spec.ExtensionRef.Name + svc := pool.Spec.EndpointPickerRef.Name return fmt.Sprintf("%s.%s.svc", svc, ns) } // portForInferencePool returns the port number for the given InferencePool. -func portForInferencePool(pool *gwaiev1a2.InferencePool) uint32 { - if p := pool.Spec.ExtensionRef.PortNumber; p == nil { +func portForInferencePool(pool *gwaiev1.InferencePool) uint32 { + if p := pool.Spec.EndpointPickerRef.Port; p == nil { return defaultEndpointPickerPort } - portNumber := *pool.Spec.ExtensionRef.PortNumber + portNumber := pool.Spec.EndpointPickerRef.Port.Number if portNumber < 0 || portNumber > 65535 { return defaultEndpointPickerPort // fallback to default port. } // Safe conversion: portNumber is validated to be in range [0, 65535]. - return uint32(portNumber) // #nosec G115 + return uint32(portNumber) // #nosec G1151 } // clusterNameForInferencePool returns the name of the ext_proc cluster for the given InferencePool. -func clusterNameForInferencePool(pool *gwaiev1a2.InferencePool) string { +func clusterNameForInferencePool(pool *gwaiev1.InferencePool) string { return fmt.Sprintf("envoy.clusters.endpointpicker_%s_%s_ext_proc", pool.GetName(), pool.GetNamespace()) } // httpFilterNameForInferencePool returns the name of the ext_proc cluster for the given InferencePool. -func httpFilterNameForInferencePool(pool *gwaiev1a2.InferencePool) string { +func httpFilterNameForInferencePool(pool *gwaiev1.InferencePool) string { return fmt.Sprintf("envoy.filters.http.ext_proc/endpointpicker/%s_%s_ext_proc", pool.GetName(), pool.GetNamespace()) } @@ -440,7 +433,7 @@ func findHCM(filterChain *listenerv3.FilterChain) (*httpconnectionmanagerv3.Http } // Tries to find the inference pool ext proc filter in the provided chain. -func searchInferencePoolInFilterChain(pool *gwaiev1a2.InferencePool, chain []*httpconnectionmanagerv3.HttpFilter) (*extprocv3.ExternalProcessor, int, error) { +func searchInferencePoolInFilterChain(pool *gwaiev1.InferencePool, chain []*httpconnectionmanagerv3.HttpFilter) (*extprocv3.ExternalProcessor, int, error) { for i, filter := range chain { if filter.Name == httpFilterNameForInferencePool(pool) { ep := new(extprocv3.ExternalProcessor) diff --git a/internal/extensionserver/post_cluster_modify.go b/internal/extensionserver/post_cluster_modify.go index f33940edd2..6accb327d1 100644 --- a/internal/extensionserver/post_cluster_modify.go +++ b/internal/extensionserver/post_cluster_modify.go @@ -13,7 +13,7 @@ import ( egextension "github.com/envoyproxy/gateway/proto/extension" clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" "google.golang.org/protobuf/types/known/durationpb" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/envoyproxy/ai-gateway/internal/internalapi" ) @@ -64,7 +64,7 @@ func (s *Server) PostClusterModify(_ context.Context, req *egextension.PostClust // // The ORIGINAL_DST cluster type tells Envoy to route requests to the destination specified // in the x-gateway-destination-endpoint header, enabling dynamic endpoint selection by the EPP. -func (s *Server) handleInferencePoolCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1a2.InferencePool) { +func (s *Server) handleInferencePoolCluster(cluster *clusterv3.Cluster, inferencePool *gwaiev1.InferencePool) { // Configure cluster for ORIGINAL_DST with header-based load balancing. // ORIGINAL_DST type allows Envoy to route to destinations specified in HTTP headers. cluster.ClusterDiscoveryType = &clusterv3.Cluster_Type{Type: clusterv3.Cluster_ORIGINAL_DST} diff --git a/internal/extensionserver/post_translate_modify.go b/internal/extensionserver/post_translate_modify.go index aed9511fe8..1595a66bf5 100644 --- a/internal/extensionserver/post_translate_modify.go +++ b/internal/extensionserver/post_translate_modify.go @@ -30,7 +30,7 @@ import ( "google.golang.org/protobuf/types/known/wrapperspb" apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" "github.com/envoyproxy/ai-gateway/internal/internalapi" @@ -364,14 +364,14 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener, // inferencePoolRoutes builds a matrix of route configs and the inference pools they use. routeNameToRoute := make(map[string]*routev3.RouteConfiguration) - routeNameToVHRouteNameToInferencePool := make(map[string]map[string]*gwaiev1a2.InferencePool) + routeNameToVHRouteNameToInferencePool := make(map[string]map[string]*gwaiev1.InferencePool) for _, routeCfg := range routes { routeNameToRoute[routeCfg.Name] = routeCfg for _, vh := range routeCfg.VirtualHosts { for _, route := range vh.Routes { if pool := getInferencePoolByMetadata(route.Metadata); pool != nil { if routeNameToVHRouteNameToInferencePool[routeCfg.Name] == nil { - routeNameToVHRouteNameToInferencePool[routeCfg.Name] = make(map[string]*gwaiev1a2.InferencePool) + routeNameToVHRouteNameToInferencePool[routeCfg.Name] = make(map[string]*gwaiev1.InferencePool) } routeNameToVHRouteNameToInferencePool[routeCfg.Name][route.Name] = pool } @@ -380,7 +380,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener, } // listenerToInferencePools builds a matrix of listeners and the inference pools they use. - listenerToInferencePools := make(map[string][]*gwaiev1a2.InferencePool) + listenerToInferencePools := make(map[string][]*gwaiev1.InferencePool) for listener, routeCfgNames := range listenerNameToRouteNames { for _, name := range routeCfgNames { if routeNameToRoute[name] == nil { @@ -391,7 +391,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener, } for _, pool := range routeNameToVHRouteNameToInferencePool[name] { if listenerToInferencePools[listener] == nil { - listenerToInferencePools[listener] = make([]*gwaiev1a2.InferencePool, 0) + listenerToInferencePools[listener] = make([]*gwaiev1.InferencePool, 0) } listenerToInferencePools[listener] = append(listenerToInferencePools[listener], pool) } @@ -433,7 +433,7 @@ func (s *Server) maybeModifyListenerAndRoutes(listeners []*listenerv3.Listener, } // patchListenerWithInferencePoolFilters adds the necessary HTTP filters to the listener to support InferencePool backends. -func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.Listener, inferencePools []*gwaiev1a2.InferencePool) { +func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.Listener, inferencePools []*gwaiev1.InferencePool) { // First, get the filter chains from the listener. filterChains := listener.GetFilterChains() defaultFC := listener.DefaultFilterChain @@ -476,8 +476,8 @@ func (s *Server) patchListenerWithInferencePoolFilters(listener *listenerv3.List } // patchVirtualHostWithInferencePool adds the necessary per-route configuration to disable. -func (s *Server) patchVirtualHostWithInferencePool(vh *routev3.VirtualHost, inferencePools []*gwaiev1a2.InferencePool) { - inferenceMatrix := make(map[string]*gwaiev1a2.InferencePool) +func (s *Server) patchVirtualHostWithInferencePool(vh *routev3.VirtualHost, inferencePools []*gwaiev1.InferencePool) { + inferenceMatrix := make(map[string]*gwaiev1.InferencePool) for _, pool := range inferencePools { inferenceMatrix[httpFilterNameForInferencePool(pool)] = pool } diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml index 7802b8c694..d1085c795b 100644 --- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml +++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml @@ -451,7 +451,7 @@ spec: description: |- Group is the group of the backend resource. When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend). - Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources. + Currently, only "inference.networking.k8s.io" is supported for InferencePool resources. maxLength: 253 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string @@ -505,9 +505,9 @@ spec: - message: group and kind must be specified together rule: '!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))' - - message: only InferencePool from inference.networking.x-k8s.io + - message: only InferencePool from inference.networking.k8s.io group is supported - rule: '!has(self.group) || (self.group == ''inference.networking.x-k8s.io'' + rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io'' && self.kind == ''InferencePool'')' maxItems: 128 type: array diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml index 7802b8c694..d1085c795b 100644 --- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_aigatewayroutes.yaml @@ -451,7 +451,7 @@ spec: description: |- Group is the group of the backend resource. When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend). - Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources. + Currently, only "inference.networking.k8s.io" is supported for InferencePool resources. maxLength: 253 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string @@ -505,9 +505,9 @@ spec: - message: group and kind must be specified together rule: '!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))' - - message: only InferencePool from inference.networking.x-k8s.io + - message: only InferencePool from inference.networking.k8s.io group is supported - rule: '!has(self.group) || (self.group == ''inference.networking.x-k8s.io'' + rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io'' && self.kind == ''InferencePool'')' maxItems: 128 type: array diff --git a/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml b/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml index 4d62d63483..ac37d2ddc5 100644 --- a/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml +++ b/manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml @@ -35,7 +35,7 @@ rules: verbs: - '*' - apiGroups: - - inference.networking.x-k8s.io + - inference.networking.k8s.io resources: - '*' verbs: diff --git a/manifests/envoy-gateway-config/rbac.yaml b/manifests/envoy-gateway-config/rbac.yaml index 79eb2b7c44..beb515e667 100644 --- a/manifests/envoy-gateway-config/rbac.yaml +++ b/manifests/envoy-gateway-config/rbac.yaml @@ -20,7 +20,7 @@ rules: - "list" - "watch" - apiGroups: - - "inference.networking.x-k8s.io" + - "inference.networking.k8s.io" resources: - "inferencepools" verbs: diff --git a/site/blog/2025/2025-07-30-epp-introduction.md b/site/blog/2025/2025-07-30-epp-introduction.md index 38c7a60670..17fe960f23 100644 --- a/site/blog/2025/2025-07-30-epp-introduction.md +++ b/site/blog/2025/2025-07-30-epp-introduction.md @@ -90,7 +90,7 @@ spec: namespace: default rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct namespace: default @@ -133,7 +133,7 @@ spec: name: x-ai-eg-model value: meta-llama/Llama-3.1-8B-Instruct backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct - matches: @@ -142,7 +142,7 @@ spec: name: x-ai-eg-model value: mistral:latest backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: mistral - matches: @@ -201,11 +201,11 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/vllm/sim-deployment.yaml ``` -### 3\. Configure InferenceModel and InferencePool +### 3\. Configure InferenceObjective and InferencePool ```yaml -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel +apiVersion: inference.networking.k8s.io/v1 +kind: InferenceObjective metadata: name: base-model spec: @@ -214,7 +214,7 @@ spec: poolRef: name: vllm-llama3-8b-instruct --- -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-llama3-8b-instruct diff --git a/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio b/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio index 002043a215..fe265eba9a 100644 --- a/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio +++ b/site/blog/2025/diagram-sources/epp-blog-diagrams.drawio @@ -424,10 +424,10 @@ - + - + @@ -462,10 +462,10 @@ - + - + diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx index 3387704a34..56f2a5abf3 100644 --- a/site/docs/api/api.mdx +++ b/site/docs/api/api.mdx @@ -552,7 +552,7 @@ It can reference either an AIServiceBackend or an InferencePool resource. name="group" type="string" required="false" - description="Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
Currently, only `inference.networking.x-k8s.io` is supported for InferencePool resources." + description="Group is the group of the backend resource.
When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
Currently, only `inference.networking.k8s.io` is supported for InferencePool resources." /> **Note**: These deployments create the `vllm-llama3-8b-instruct` InferencePool and related resources that are referenced in the AIGatewayRoute configuration below. -## Step 3: Create EndpointPicker Resources +## Step 3: Create Custom InferencePool Resources -Create the base resources for the example, including additional inference backends: +Create additional inference backends with custom EndpointPicker configuration: ```yaml cat < **Note**: This deployment creates the `vllm-llama3-8b-instruct` InferencePool and related resources that are referenced in the HTTPRoute configuration below. -## Step 3: Create InferenceModel +## Step 3: Create InferenceObjective -Create an InferenceModel resource to define the model configuration: +Create an InferenceObjective resource to define the model configuration: ```bash -kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencemodel.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.0.1/config/manifests/inferenceobjective.yaml ``` ## Step 4: Create InferencePool Resources @@ -60,14 +60,15 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extens Deploy the InferencePool and related resources: ```bash -kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencepool-resources.yaml +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.1/config/manifests/inferencepool-resources.yaml ``` This creates: - InferencePool resource defining the endpoint selection criteria -- Endpoint Picker Provider (EPP) deployment for intelligent routing +- Endpoint Picker Provider (EPP) deployment for intelligent routing with advanced scheduling plugins - Associated services and configurations +- RBAC permissions for accessing InferencePool and Pod resources ## Step 5: Configure Gateway and HTTPRoute @@ -107,10 +108,11 @@ spec: namespace: default rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-llama3-8b-instruct namespace: default + port: 8080 weight: 1 matches: - path: diff --git a/tests/crdcel/main_test.go b/tests/crdcel/main_test.go index 408352958e..8130622875 100644 --- a/tests/crdcel/main_test.go +++ b/tests/crdcel/main_test.go @@ -51,7 +51,7 @@ func TestAIGatewayRoutes(t *testing.T) { }, { name: "inference_pool_unsupported_group.yaml", - expErr: "spec.rules[0].backendRefs[0]: Invalid value: \"object\": only InferencePool from inference.networking.x-k8s.io group is supported", + expErr: "spec.rules[0].backendRefs[0]: Invalid value: \"object\": only InferencePool from inference.networking.k8s.io group is supported", }, } { t.Run(tc.name, func(t *testing.T) { diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml index 285042b651..fe15989506 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_basic.yaml @@ -13,11 +13,8 @@ spec: parentRefs: - name: gateway rules: - - matches: - - path: - type: PathPrefix - value: /v1/chat/completions + - matches: [] backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: my-inference-pool diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml index cc130935e6..2038b980d5 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_mixed_backends.yaml @@ -24,5 +24,5 @@ spec: backendRefs: - name: ai-service-backend - name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml index 64a6c0949c..3931176aee 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_multiple.yaml @@ -23,8 +23,8 @@ spec: value: llama3-8b backendRefs: - name: vllm-llama3-8b-instruct-1 - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool - name: vllm-llama3-8b-instruct-2 - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml index dfd9213f37..145d186278 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_partial_ref.yaml @@ -23,5 +23,5 @@ spec: value: llama3-8b backendRefs: - name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io # Missing kind field diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml index 12bee37490..4b927c621d 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_unsupported_group.yaml @@ -3,7 +3,7 @@ # The full text of the Apache license is available in the LICENSE file at # the root of the repo. -# This should fail validation: only InferencePool from inference.networking.x-k8s.io group is supported +# This should fail validation: only InferencePool from inference.networking.k8s.io group is supported apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIGatewayRoute diff --git a/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml b/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml index 27f3293376..d512c49371 100644 --- a/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml +++ b/tests/crdcel/testdata/aigatewayroutes/inference_pool_valid.yaml @@ -21,5 +21,5 @@ spec: value: llama3-8b backendRefs: - name: vllm-llama3-8b-instruct - group: inference.networking.x-k8s.io + group: inference.networking.k8s.io kind: InferencePool diff --git a/tests/e2e-inference-extension/conformance_test.go b/tests/e2e-inference-extension/conformance_test.go index f54cad3e6e..a67e879827 100644 --- a/tests/e2e-inference-extension/conformance_test.go +++ b/tests/e2e-inference-extension/conformance_test.go @@ -6,6 +6,8 @@ package e2e import ( + "fmt" + "os" "testing" "time" @@ -40,10 +42,19 @@ func TestGatewayAPIInferenceExtension(t *testing.T) { config.SetupTimeoutConfig(&defaultTimeoutConfig) options.TimeoutConfig = defaultTimeoutConfig options.GatewayClassName = "inference-pool" - // enable EPPUnAvaliableFailOpen after https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/1265 merged. - options.SkipTests = []string{ - "EppUnAvailableFailOpen", - } + options.SkipTests = []string{} + + // Setup cleanup to print report even if test fails + t.Cleanup(func() { + if content, err := os.ReadFile(options.ReportOutputPath); err != nil { + t.Logf("Failed to read conformance report file %s: %v", options.ReportOutputPath, err) + } else { + fmt.Printf("\n=== CONFORMANCE TEST REPORT (CLEANUP) ===\n") + fmt.Printf("Report file: %s\n", options.ReportOutputPath) + fmt.Printf("Content:\n%s\n", string(content)) + fmt.Printf("=== END OF REPORT (CLEANUP) ===\n\n") + } + }) gie.RunConformanceWithOptions(t, options) } diff --git a/tests/e2e-inference-extension/inference_pool_test.go b/tests/e2e-inference-extension/inference_pool_test.go index 5f129d45ca..306d0bf2cd 100644 --- a/tests/e2e-inference-extension/inference_pool_test.go +++ b/tests/e2e-inference-extension/inference_pool_test.go @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "github.com/envoyproxy/ai-gateway/tests/internal/e2elib" ) @@ -176,14 +176,14 @@ func testInferenceGatewayConnectivity(t *testing.T, egSelector, body string, add } // getInferencePoolStatus retrieves the status of an InferencePool resource. -func getInferencePoolStatus(ctx context.Context, namespace, name string) (*gwaiev1a2.InferencePoolStatus, error) { +func getInferencePoolStatus(ctx context.Context, namespace, name string) (*gwaiev1.InferencePoolStatus, error) { cmd := exec.CommandContext(ctx, "kubectl", "get", "inferencepool", name, "-n", namespace, "-o", "json") out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("failed to get InferencePool %s/%s: %w", namespace, name, err) } - var inferencePool gwaiev1a2.InferencePool + var inferencePool gwaiev1.InferencePool if err := json.Unmarshal(out, &inferencePool); err != nil { return nil, fmt.Errorf("failed to unmarshal InferencePool: %w", err) } @@ -207,10 +207,10 @@ func requireInferencePoolStatusValid(t *testing.T, namespace, inferencePoolName, } // Find the parent status for the expected Gateway. - var foundParent *gwaiev1a2.PoolStatus + var foundParent *gwaiev1.ParentStatus for i := range status.Parents { parent := &status.Parents[i] - if string(parent.GatewayRef.Name) == expectedGatewayName { + if string(parent.ParentRef.Name) == expectedGatewayName { foundParent = parent break } @@ -222,23 +222,23 @@ func requireInferencePoolStatusValid(t *testing.T, namespace, inferencePoolName, } // Validate the GatewayRef fields. - if foundParent.GatewayRef.Group == nil || string(*foundParent.GatewayRef.Group) != "gateway.networking.k8s.io" { - t.Logf("InferencePool %s parent GatewayRef has incorrect group: %v", inferencePoolName, foundParent.GatewayRef.Group) + if foundParent.ParentRef.Group == nil || string(*foundParent.ParentRef.Group) != "gateway.networking.k8s.io" { + t.Logf("InferencePool %s parent GatewayRef has incorrect group: %v", inferencePoolName, foundParent.ParentRef.Group) return false } - if foundParent.GatewayRef.Kind == nil || string(*foundParent.GatewayRef.Kind) != "Gateway" { - t.Logf("InferencePool %s parent GatewayRef has incorrect kind: %v", inferencePoolName, foundParent.GatewayRef.Kind) + if string(foundParent.ParentRef.Kind) != "Gateway" { + t.Logf("InferencePool %s parent GatewayRef has incorrect kind: %v", inferencePoolName, foundParent.ParentRef.Kind) return false } - if string(foundParent.GatewayRef.Name) != expectedGatewayName { - t.Logf("InferencePool %s parent GatewayRef has incorrect name: %s (expected %s)", inferencePoolName, foundParent.GatewayRef.Name, expectedGatewayName) + if string(foundParent.ParentRef.Name) != expectedGatewayName { + t.Logf("InferencePool %s parent GatewayRef has incorrect name: %s (expected %s)", inferencePoolName, foundParent.ParentRef.Name, expectedGatewayName) return false } - if foundParent.GatewayRef.Namespace == nil || string(*foundParent.GatewayRef.Namespace) != namespace { - t.Logf("InferencePool %s parent GatewayRef has incorrect namespace: %v (expected %s)", inferencePoolName, foundParent.GatewayRef.Namespace, namespace) + if string(foundParent.ParentRef.Namespace) != namespace { + t.Logf("InferencePool %s parent GatewayRef has incorrect namespace: %v (expected %s)", inferencePoolName, foundParent.ParentRef.Namespace, namespace) return false } diff --git a/tests/internal/e2elib/e2elib.go b/tests/internal/e2elib/e2elib.go index 61efab6d32..b9f5b3aed3 100644 --- a/tests/internal/e2elib/e2elib.go +++ b/tests/internal/e2elib/e2elib.go @@ -347,7 +347,7 @@ func CleanupKindCluster(testsFailed bool, clusterName string) { } func installInferenceExtensionCRD(ctx context.Context) (err error) { - const infExtURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml" + const infExtURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml" return KubectlApplyManifest(ctx, infExtURL) } @@ -357,12 +357,12 @@ func installVLLMDeployment(ctx context.Context) (err error) { } func installInferenceModel(ctx context.Context) (err error) { - const inferenceModelURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencemodel.yaml" + const inferenceModelURL = "https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.0.1/config/manifests/inferenceobjective.yaml" return KubectlApplyManifest(ctx, inferenceModelURL) } func installInferencePoolResources(ctx context.Context) (err error) { - const inferencePoolURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v0.5.1/config/manifests/inferencepool-resources.yaml" + const inferencePoolURL = "https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.1/config/manifests/inferencepool-resources.yaml" return KubectlApplyManifest(ctx, inferencePoolURL) }