envoyproxy · mathetake · Oct 10, 2025 · Aug 11, 2025 · Oct 10, 2025 · Oct 10, 2025
@@ -44,8 +44,8 @@ linters:
           alias: egv1a1
         - pkg: github.com/envoyproxy/ai-gateway/api/v1alpha1
           alias: aigv1a1
-        - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2
-          alias: gwaiev1a2
+        - pkg: sigs.k8s.io/gateway-api-inference-extension/api/v1
+          alias: gwaiev1
         - pkg: k8s.io/apimachinery/pkg/apis/meta/v1
           alias: metav1
         - pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1

@@ -266,7 +266,7 @@ type AIGatewayRouteRule struct {
 // It can reference either an AIServiceBackend or an InferencePool resource.
 //
 // +kubebuilder:validation:XValidation:rule="!has(self.group) && !has(self.kind) || (has(self.group) && has(self.kind))", message="group and kind must be specified together"
-// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.x-k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.x-k8s.io group is supported"
+// +kubebuilder:validation:XValidation:rule="!has(self.group) || (self.group == 'inference.networking.k8s.io' && self.kind == 'InferencePool')", message="only InferencePool from inference.networking.k8s.io group is supported"
 type AIGatewayRouteRuleBackendRef struct {
 	// Name is the name of the backend resource.
 	// When Group and Kind are not specified, this refers to an AIServiceBackend.
@@ -278,7 +278,7 @@ type AIGatewayRouteRuleBackendRef struct {
 
 	// Group is the group of the backend resource.
 	// When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend).
-	// Currently, only "inference.networking.x-k8s.io" is supported for InferencePool resources.
+	// Currently, only "inference.networking.k8s.io" is supported for InferencePool resources.
 	//
 	// +optional
 	// +kubebuilder:validation:MaxLength=253

@@ -15,7 +15,7 @@ const (
 	defaultRequestTimeout gwapiv1.Duration = "60s"
 
 	// inferencePoolGroup is the API group for InferencePool resources.
-	inferencePoolGroup = "inference.networking.x-k8s.io"
+	inferencePoolGroup = "inference.networking.k8s.io"
 	// inferencePoolKind is the kind for InferencePool resources.
 	inferencePoolKind = "InferencePool"
 )

@@ -24,7 +24,7 @@ extensionApis:
   enableBackend: true
 extensionManager:
   backendResources:
-    - group: inference.networking.x-k8s.io
+    - group: inference.networking.k8s.io
       kind: InferencePool
       version: v1alpha2
   hooks:

@@ -51,7 +51,7 @@ When request goes to envoyproxy, it goes to the http filter chain, the ext-proc
 The gRPC service info is pre-defined in [InferencePool](https://gateway-api-inference-extension.sigs.k8s.io/api-types/inferencepool/) extensionRef, giving an example below:
 
 ```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -81,7 +81,7 @@ spec:
     name: inference-gateway
   rules:
   - backendRefs:
-    - group: inference.networking.x-k8s.io
+    - group: inference.networking.k8s.io
       kind: InferencePool
       name: vllm-llama3-8b-instruct
     matches:
@@ -209,7 +209,7 @@ This requires to expand the `AIGatewayRouteRuleBackendRef` with `BackendObjectRe
 - When it matches vllm-llama3-8b-instruct goes to InferencePool `vllm-llama3-8b-instruct`
 
 ```
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -249,7 +249,7 @@ spec:
               value: vllm-llama3-8b-instruct
       backendRefs:
         - name: vllm-llama3-8b-instruct
-        	group: inference.networking.x-k8s.io
+        	group: inference.networking.k8s.io
           kind: InferencePool
 ```
 
@@ -269,7 +269,7 @@ This approach is preferred because InferencePool resources do not require Backen
 - When it matches vllm-llama3-8b-instruct goes to AIServiceBackend `vllm-llama3-8b-instruct`
 
 ```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -319,7 +319,7 @@ spec:
     name: OpenAI
   backendRef:
     name: vllm-llama3-8b-instruct
-    group: inference.networking.x-k8s.io
+    group: inference.networking.k8s.io
     kind: InferencePool
 ```
 
@@ -384,7 +384,7 @@ It adds the the cluster with override_host loadBalancingPolicy, we can add the h
 Take the configuration below as an example:
 
 ```yaml
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
@@ -417,7 +417,7 @@ spec:
               value: vllm-llama3-8b-instruct
       backendRefs:
         - name: vllm-llama3-8b-instruct
-        	group: inference.networking.x-k8s.io
+        	group: inference.networking.k8s.io
           kind: InferencePool
 ```
 
@@ -582,7 +582,7 @@ spec:
               name: x-ai-eg-model
               value: meta-llama/Llama-3.1-8B-Instruct
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
     - matches:
@@ -591,7 +591,7 @@ spec:
               name: x-ai-eg-model
               value: mistral:latest
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: mistral
     - matches:
@@ -619,7 +619,7 @@ spec:
       namespace: default
   rules:
     - backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
           namespace: default

@@ -49,7 +49,7 @@ spec:
               name: Authorization
               value: sk-zyxwvutsrqponmlkjihgfedcba
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
     - matches:
@@ -58,7 +58,7 @@ spec:
               name: x-ai-eg-model
               value: mistral:latest
       backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: mistral
     - matches:

@@ -49,7 +49,7 @@ spec:
             initialDelaySeconds: 1
             periodSeconds: 1
 ---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
+apiVersion: inference.networking.k8s.io/v1
 kind: InferencePool
 metadata:
   name: mistral
@@ -61,16 +61,16 @@ spec:
   extensionRef:
     name: mistral-epp
 ---
-apiVersion: inference.networking.x-k8s.io/v1alpha2
-kind: InferenceModel
+apiVersion: inference.networking.k8s.io/v1
+kind: InferenceObjective
 metadata:
   name: mistral
   namespace: default
 spec:
   modelName: mistral:latest
   criticality: Critical
   poolRef:
-    # Bind the InferenceModel to the InferencePool.
+    # Bind the InferenceObjective to the InferencePool.
     name: mistral
 ---
 apiVersion: v1
@@ -158,7 +158,7 @@ metadata:
   namespace: default
 data:
   default-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    apiVersion: inference.networking.k8s.io/v1alpha1
     kind: EndpointPickerConfig
     plugins:
     - type: low-queue-filter
@@ -208,7 +208,7 @@ data:
       - pluginRef: low-latency-filter
       - pluginRef: random-picker
   plugins-v2.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    apiVersion: inference.networking.k8s.io/v1alpha1
     kind: EndpointPickerConfig
     plugins:
     - type: queue-scorer
@@ -238,10 +238,10 @@ apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
 rules:
-  - apiGroups: ["inference.networking.x-k8s.io"]
+  - apiGroups: ["inference.networking.k8s.io"]
     resources: ["inferencepools"]
     verbs: ["get", "watch", "list"]
-  - apiGroups: ["inference.networking.x-k8s.io"]
+  - apiGroups: ["inference.networking.k8s.io"]
     resources: ["inferencemodels"]
     verbs: ["get", "watch", "list"]
   - apiGroups: [""]

@@ -42,7 +42,7 @@ data:
       enableBackend: true
     extensionManager:
       backendResources:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           version: v1alpha2
       hooks:

@@ -35,7 +35,7 @@ spec:
       namespace: default
   rules:
     - backendRefs:
-        - group: inference.networking.x-k8s.io
+        - group: inference.networking.k8s.io
           kind: InferencePool
           name: vllm-llama3-8b-instruct
           namespace: default

@@ -67,7 +67,7 @@ require (
 	k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
 	sigs.k8s.io/controller-runtime v0.22.1
 	sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c
-	sigs.k8s.io/gateway-api-inference-extension v0.5.1
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0
 	sigs.k8s.io/yaml v1.6.0
 )
 

@@ -26,6 +26,9 @@ github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJ
 github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
 github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI=
 github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
+github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
@@ -344,10 +347,10 @@ github.com/ohler55/ojg v1.26.10/go.mod h1:/Y5dGWkekv9ocnUixuETqiL58f+5pAsUfg5P8e
 github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
 github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
-github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
-github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
-github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
-github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
+github.com/onsi/ginkgo/v2 v2.24.0 h1:obZz8LAnHicNdbBqvG3ytAFx8fgza+i1IDpBVcHT2YE=
+github.com/onsi/ginkgo/v2 v2.24.0/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk=
+github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
+github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
 github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
 github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
 github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k=
@@ -661,8 +664,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV
 sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
 sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c h1:GS4VnGRV90GEUjrgQ2GT5ii6yzWj3KtgUg+sVMdhs5c=
 sigs.k8s.io/gateway-api v1.3.1-0.20250527223622-54df0a899c1c/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
-sigs.k8s.io/gateway-api-inference-extension v0.5.1 h1:OMpt4gKlPWkD+h5kHcZZVh4926kix2DSBPI7X5ntuCA=
-sigs.k8s.io/gateway-api-inference-extension v0.5.1/go.mod h1:lki0jx1qysZSZT4Ai2BxuAcpx6G8g5oBgOGuuJzjy/k=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
 sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
 sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/kubectl-validate v0.0.5-0.20250915070809-d2f2d68fba09 h1:JQbPOwLjSztom+aSDQIi6UZq8V0Gbv7BjAlYQSgycCI=

@@ -437,7 +437,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {
 					BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
 						{
 							Name:   "test-inference-pool",
-							Group:  ptr.To("inference.networking.x-k8s.io"),
+							Group:  ptr.To("inference.networking.k8s.io"),
 							Kind:   ptr.To("InferencePool"),
 							Weight: ptr.To(int32(100)),
 						},
@@ -460,7 +460,7 @@ func Test_newHTTPRoute_InferencePool(t *testing.T) {
 
 	// Check the first rule (our InferencePool rule).
 	backendRef := httpRoute.Spec.Rules[0].BackendRefs[0]
-	require.Equal(t, "inference.networking.x-k8s.io", string(*backendRef.Group))
+	require.Equal(t, "inference.networking.k8s.io", string(*backendRef.Group))
 	require.Equal(t, "InferencePool", string(*backendRef.Kind))
 	require.Equal(t, "test-inference-pool", string(backendRef.Name))
 	require.Equal(t, "test-ns", string(*backendRef.Namespace))

@@ -34,7 +34,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/source"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
-	gwaiev1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	gwaiev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gwapiv1a3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
 	gwapiv1b1 "sigs.k8s.io/gateway-api/apis/v1beta1"
@@ -50,7 +50,7 @@ func init() {
 	utilruntime.Must(gwapiv1.Install(Scheme))
 	utilruntime.Must(gwapiv1a3.Install(Scheme))
 	utilruntime.Must(gwapiv1b1.Install(Scheme))
-	utilruntime.Must(gwaiev1a2.Install(Scheme))
+	utilruntime.Must(gwaiev1.Install(Scheme))
 }
 
 // Scheme contains the necessary schemes for the AI Gateway.
@@ -161,7 +161,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
 	if err != nil {
 		return fmt.Errorf("failed to create CRD client for inference extension: %w", err)
 	}
-	const inferencePoolCRD = "inferencepools.inference.networking.x-k8s.io"
+	const inferencePoolCRD = "inferencepools.inference.networking.k8s.io"
 	if _, crdErr := crdClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, inferencePoolCRD, metav1.GetOptions{}); crdErr != nil {
 		if apierrors.IsNotFound(crdErr) {
 			logger.Info("InferencePool CRD not found, skipping InferencePool controller. " +
@@ -173,7 +173,7 @@ func StartControllers(ctx context.Context, mgr manager.Manager, config *rest.Con
 		// CRD exists, create the controller.
 		inferencePoolC := NewInferencePoolController(c, kubernetes.NewForConfigOrDie(config), logger.
 			WithName("inference-pool"))
-		if err = TypedControllerBuilderForCRD(mgr, &gwaiev1a2.InferencePool{}).
+		if err = TypedControllerBuilderForCRD(mgr, &gwaiev1.InferencePool{}).
 			Watches(&gwapiv1.Gateway{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.gatewayEventHandler)).
 			Watches(&aigv1a1.AIGatewayRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.aiGatewayRouteEventHandler)).
 			Watches(&gwapiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(inferencePoolC.httpRouteEventHandler)).