Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/llm-d-modelservice/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: "v0.4.8"
version: "v0.4.9"
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,61 @@
{{- if and .Values.requester.enable (and .Values.decode.create (not .Values.multinode)) }}
---
apiVersion: fma.llm-d.ai/v1alpha1
kind: LauncherConfig
metadata:
name: {{ .Values.requester.launcherConfig | required "requester.launcherConfig is required when requester is enabled" }}
spec:
maxSleepingInstances: {{ .Values.requester.launcherConfigSpec.maxSleepingInstances | default 3 }}
podTemplate:
{{- with .Values.requester.launcherConfigSpec.podTemplate.metadata }}
metadata:
{{- toYaml . | nindent 6 }}
{{- end }}
spec:
{{- $podSpec := omit .Values.requester.launcherConfigSpec.podTemplate.spec "containers" "volumes" }}
{{- with $podSpec }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.requester.launcherConfigSpec.podTemplate.spec.containers }}
containers:
{{- range . }}
{{- (include "llm-d-modelservice.container" (dict "role" "launcher" "container" . "parallelism" $.Values.decode.parallelism "Values" $.Values "Release" $.Release "Chart" $.Chart "pdSpec" $.Values.requester.launcherConfigSpec)) | nindent 8 }}
{{- end }}
{{- end }}
{{- if or .Values.requester.launcherConfigSpec.podTemplate.spec.volumes .Values.modelArtifacts }}
volumes:
{{- with .Values.requester.launcherConfigSpec.podTemplate.spec.volumes }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- if .Values.modelArtifacts }}
{{- include "llm-d-modelservice.mountModelVolumeVolumes" .Values.modelArtifacts | nindent 6 }}
{{- end }}
{{- end }}
---
apiVersion: fma.llm-d.ai/v1alpha1
kind: InferenceServerConfig
metadata:
name: {{ .Values.requester.inferenceServerConfig }}
spec:
launcherConfigName: {{ .Values.requester.launcherConfig }}
modelServerConfig:
{{- with .Values.requester.modelServerConfig.annotations }}
annotations:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.requester.modelServerConfig.labels }}
labels:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.requester.modelServerConfig.env_vars }}
env_vars:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- if .Values.requester.modelServerConfig.options }}
options: {{ .Values.requester.modelServerConfig.options | quote }}
{{- end }}
port: {{ .Values.requester.modelServerConfig.port | default 8005 }}
---
apiVersion: apps/v1
kind: ReplicaSet
metadata:
Expand All @@ -13,40 +70,17 @@ spec:
labels:
app: dp-app
annotations:
dual-pod.llm-d.ai/admin-port: "{{ .Values.requester.adminPort }}"
dual-pod.llm-d.ai/server-patch: |
metadata:
labels: {
{{- $modelParts := split "/" .Values.modelArtifacts.name -}}
"model-reg": {{ index $modelParts._1 | quote }},
"model-repo": {{ index $modelParts._0 | quote }},
"app": null}
spec:
{{- if or (.Values.decode.initContainers) (eq .Values.routing.proxy.enabled true) }}
initContainers:
{{- (include "llm-d-modelservice.routingProxy" (dict "proxy" .Values.routing.proxy "servicePort" .Values.routing.servicePort "Values" .Values)) | nindent 12 }}
{{- if .Values.decode.initContainers }}
{{- toYaml .Values.decode.initContainers | nindent 12 }}
{{- end }}
{{- end }}
{{- with .Values.decode.containers }}
containers:
{{- range . }}
{{- (include "llm-d-modelservice.container" (dict "role" "decode" "container" . "parallelism" $.Values.decode.parallelism "Values" $.Values "Release" $.Release "Chart" $.Chart "pdSpec" $.Values.decode)) | nindent 14 }}
{{- end }}
{{- end }}
dual-pods.llm-d.ai/admin-port: "{{ .Values.requester.adminPort }}"
dual-pods.llm-d.ai/inference-server-config: {{ .Values.requester.inferenceServerConfig | required "requester.inferenceServerConfig is required" }}
spec:
containers:
- name: inference-server
image: {{ .Values.requester.image }}
imagePullPolicy: Always
command:
- /app/requester
env:
- name: PROBES_PORT
value: {{ .Values.requester.port.probes | quote }}
- name: SPI_PORT
value: {{ .Values.requester.port.spi | quote }}
- --logtostderr=false
- --log_file=/tmp/requester.log
ports:
- name: probes
containerPort: {{ .Values.requester.port.probes }}
Expand Down
92 changes: 92 additions & 0 deletions charts/llm-d-modelservice/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3521,6 +3521,98 @@
"title": "image",
"type": "string"
},
"inferenceServerConfig": {
"default": "",
"description": "Name of the InferenceServerConfig resource (required when requester is enabled)",
"title": "inferenceServerConfig",
"type": "string"
},
"launcherConfig": {
"default": "",
"description": "Name of the LauncherConfig resource (required when requester is enabled)",
"title": "launcherConfig",
"type": "string"
},
"launcherConfigSpec": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"maxSleepingInstances": {
"default": 3,
"title": "maxSleepingInstances",
"type": "integer"
},
"podTemplate": {
"additionalProperties": false,
"properties": {
"metadata": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "metadata"
},
"spec": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"containers": {
"description": " items: additionalProperties: true @schema",
"items": {
"additionalProperties": true,
"required": []
},
"required": [],
"title": "containers"
}
},
"required": [],
"title": "spec"
}
},
"required": [],
"title": "podTemplate",
"type": "object"
}
},
"required": [],
"title": "launcherConfigSpec"
},
"modelServerConfig": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"annotations": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "annotations"
},
"env_vars": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "env_vars"
},
"labels": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "labels"
},
"options": {
"default": "",
"title": "options",
"type": "string"
},
"port": {
"default": 8005,
"title": "port",
"type": "integer"
}
},
"required": [],
"title": "modelServerConfig"
},
"port": {
"additionalProperties": false,
"properties": {
Expand Down
92 changes: 92 additions & 0 deletions charts/llm-d-modelservice/values.schema.tmpl.json
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,98 @@
"title": "image",
"type": "string"
},
"inferenceServerConfig": {
"default": "",
"description": "Name of the InferenceServerConfig resource (required when requester is enabled)",
"title": "inferenceServerConfig",
"type": "string"
},
"launcherConfig": {
"default": "",
"description": "Name of the LauncherConfig resource (required when requester is enabled)",
"title": "launcherConfig",
"type": "string"
},
"launcherConfigSpec": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"maxSleepingInstances": {
"default": 3,
"title": "maxSleepingInstances",
"type": "integer"
},
"podTemplate": {
"additionalProperties": false,
"properties": {
"metadata": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "metadata"
},
"spec": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"containers": {
"description": " items: additionalProperties: true @schema",
"items": {
"additionalProperties": true,
"required": []
},
"required": [],
"title": "containers"
}
},
"required": [],
"title": "spec"
}
},
"required": [],
"title": "podTemplate",
"type": "object"
}
},
"required": [],
"title": "launcherConfigSpec"
},
"modelServerConfig": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"properties": {
"annotations": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "annotations"
},
"env_vars": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "env_vars"
},
"labels": {
"additionalProperties": true,
"description": " additionalProperties: true @schema",
"required": [],
"title": "labels"
},
"options": {
"default": "",
"title": "options",
"type": "string"
},
"port": {
"default": 8005,
"title": "port",
"type": "integer"
}
},
"required": [],
"title": "modelServerConfig"
},
"port": {
"additionalProperties": false,
"properties": {
Expand Down
53 changes: 53 additions & 0 deletions charts/llm-d-modelservice/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,18 +159,71 @@ requester:
enable: false
image: "ghcr.io/llm-d-incubation/llm-d-fast-model-actuation/requester:latest"
adminPort: 8081
# Name of the InferenceServerConfig resource (required when requester is enabled)
inferenceServerConfig: ""
# Name of the LauncherConfig resource (required when requester is enabled)
launcherConfig: ""
port:
probes: 8080
spi: 8081
readinessProbe:
initialDelaySeconds: 2
periodSeconds: 5

resources:
limits:
gpus: 1
cpus: 1
memory: 250Mi

# @schema
# additionalProperties: true
# @schema
launcherConfigSpec:
maxSleepingInstances: 3
podTemplate:
# @schema
# additionalProperties: true
# @schema
metadata: {}
# @schema
# additionalProperties: true
# @schema
spec:
# @schema
# items:
# additionalProperties: true
# @schema
containers:
- name: inference-server
image: "ghcr.io/llm-d-incubation/llm-d-fast-model-actuation/launcher:latest"
imagePullPolicy: IfNotPresent
command:
- /bin/bash
- -c
args:
- |
uvicorn launcher:app --host 0.0.0.0 --log-level info --port 8001

# @schema
# additionalProperties: true
# @schema
modelServerConfig:
# @schema
# additionalProperties: true
# @schema
annotations: {}
# @schema
# additionalProperties: true
# @schema
labels: {}
# @schema
# additionalProperties: true
# @schema
env_vars: {}
options: ""
port: 8005

# Describe routing requirements. In addition to service level routing (OpenAI model name, service port)
# also describes elements for Gateway API Inference Extension configuration
routing:
Expand Down
Loading
Loading