move inferenceObjective to top level and cleanup template

Gregory-Pereira · Gregory-Pereira · commit ff978186aaee · 2025-12-15T07:12:12.000-08:00
Signed-off-by: greg pereira &lt;grpereir@redhat.com&gt;
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -225,7 +225,6 @@ The following table list the configurable parameters of the chart.
 | `inferencePool.targetPortNumber`                           | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000.                                                                                                                             |
 | `inferencePool.modelServerType`                            | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm.                                                                                                                                             |
 | `inferencePool.modelServers.matchLabels`                   | Label selector to match vllm backends managed by the inference pool.                                                                                                                                                                               |
-| `inferencePool.priority`                                   | A priority that will be applied to the inferencepool through an inferenceobjective.                                                                                                                                                                |
 | `inferenceExtension.replicas`                              | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                                                                           |
 | `inferenceExtension.image.name`                            | Name of the container image used for the endpoint picker.                                                                                                                                                                                          |
 | `inferenceExtension.image.hub`                             | Registry URL where the endpoint picker image is hosted.                                                                                                                                                                                            |
@@ -264,6 +263,7 @@ The following table list the configurable parameters of the chart.
 | `inferenceExtension.sidecar.volumeMounts`                  | List of volume mounts for the sidecar container. Optional.                                                                                                                                                                                         |
 | `inferenceExtension.sidecar.volumes`                       | List of volumes for the sidecar container. Optional.                                                                                                                                                                                               |
 | `inferenceExtension.sidecar.configMapData`                 | Custom key-value pairs to be included in a ConfigMap created for the sidecar container. Only used when `inferenceExtension.sidecar.enabled` is `true`. Optional.                                                                                   |
+| `inferenceObjectives`                                      | A list of names and priorities to create InferenceObjectives from that will be assigned to the inference pool                                                                                                                                      |
 | `provider.name`                                            | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                                                                                                                         |
 | `provider.gke.autopilot`                                   | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`.                                                                                                                       |
 
diff --git a/config/charts/inferencepool/templates/inferenceobjectives.yaml b/config/charts/inferencepool/templates/inferenceobjectives.yaml
@@ -1,8 +1,4 @@
-{{- range .Values.inferencePool.inferenceObjectives }}
-{{- $group := "inference.networking.k8s.io" -}}
-{{- if eq $.Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2" -}}
-{{- $group = "inference.networking.x-k8s.io" -}}
-{{- end -}}
+{{- range .Values.inferenceObjectives }}
 ---
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceObjective
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -167,15 +167,7 @@ inferencePool:
   # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
   targetPortNumber: 8000
 
-  # Optional: Define multiple InferenceObjectives for this InferencePool.
-  # Each InferenceObjective associates a name and priority with this InferencePool.
-  # Users reference these objectives by name in their request headers.
-  # inferenceObjectives:
-  #   - name: high-priority
-  #     priority: 1
-  #   - name: low-priority
-  #     priority: 5
-  inferenceObjectives: []
+
 
 # Options: ["gke", "istio", "none"]
 provider:
@@ -209,3 +201,13 @@ istio:
       # connectionPool:
       #   http:
       #     maxRequestsPerConnection: 256000
+
+
+# Optional: Define multiple InferenceObjectives for this InferencePool.
+# Each InferenceObjective associates a name and priority with this InferencePool.
+# Users reference these objectives by name in their request headers.
+inferenceObjectives: []
+#   - name: high-priority
+#     priority: 1
+#   - name: low-priority
+#     priority: 5