Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions config/charts/inferencepool/templates/inferenceobjective.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
{{- if .Values.inferencePool.priority }}
{{- range .Values.inferencePool.inferenceObjectives }}
{{- $group := "inference.networking.k8s.io" -}}
{{- if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2" -}}
{{- if eq $.Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2" -}}
{{- $group = "inference.networking.x-k8s.io" -}}
{{- end -}}
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferenceObjective
metadata:
name: {{ .Release.Name }}
namespace: {{ .Release.Namespace }}
name: {{ .name }}
namespace: {{ $.Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
{{- include "gateway-api-inference-extension.labels" $ | nindent 4 }}
spec:
priority: {{ .Values.inferencePool.priority }}
priority: {{ .priority }}
poolRef:
group: {{ $group }}
name: {{ .Release.Name }}
name: {{ $.Release.Name }}
{{- end }}
12 changes: 9 additions & 3 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,15 @@ inferencePool:
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
targetPortNumber: 8000

# Optional: Set priority for this InferencePool via an InferenceObjective.
# When defined, an InferenceObjective resource will be created with this priority value for the given inferencepool.
# priority: 2
# Optional: Define multiple InferenceObjectives for this InferencePool.
# Each InferenceObjective associates a name and priority with this InferencePool.
# Users reference these objectives by name in their request headers.
# inferenceObjectives:
# - name: high-priority
# priority: 1
# - name: low-priority
# priority: 5
inferenceObjectives: []

# Options: ["gke", "istio", "none"]
provider:
Expand Down