Skip to content

Commit 1051709

Browse files
Update requester template to work with latest FMA
Co-authored-by: aavarghese <avarghese@us.ibm.com> Co-authored-by: manoelmarques <manoel.marques@ibm.com> Signed-off-by: manoelmarques <manoel.marques@ibm.com> Signed-off-by: aavarghese <avarghese@us.ibm.com>
1 parent 8bb5ff6 commit 1051709

17 files changed

+645
-147
lines changed

charts/llm-d-modelservice/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ type: application
1313
# This is the chart version. This version number should be incremented each time you make changes
1414
# to the chart and its templates, including the app version.
1515
# Versions are expected to follow Semantic Versioning (https://semver.org/)
16-
version: "v0.4.7"
16+
version: "v0.4.8"
1717
# This is the version number of the application being deployed. This version number should be
1818
# incremented each time you make changes to the application. Versions are not expected to
1919
# follow Semantic Versioning. They should reflect the version the application is using.

charts/llm-d-modelservice/templates/decode-requester-replicaset.yaml

Lines changed: 70 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,69 @@
11
{{- if and .Values.requester.enable (and .Values.decode.create (not .Values.multinode)) }}
2+
---
3+
apiVersion: fma.llm-d.ai/v1alpha1
4+
kind: LauncherConfig
5+
metadata:
6+
name: {{ .Values.requester.launcherConfig | required "requester.launcherConfig is required when requester is enabled" }}
7+
{{- with .Values.requester.launcherConfigAnnotations }}
8+
annotations:
9+
{{- toYaml . | nindent 4 }}
10+
{{- end }}
11+
spec:
12+
maxSleepingInstances: {{ .Values.requester.launcherConfigSpec.maxSleepingInstances | default 3 }}
13+
podTemplate:
14+
{{- with .Values.requester.launcherConfigSpec.podTemplate.metadata }}
15+
metadata:
16+
{{- toYaml . | nindent 6 }}
17+
{{- end }}
18+
spec:
19+
{{- with .Values.requester.launcherConfigSpec.podTemplate.spec }}
20+
{{- toYaml . | nindent 6 }}
21+
{{- end }}
22+
---
23+
apiVersion: fma.llm-d.ai/v1alpha1
24+
kind: InferenceServerConfig
25+
metadata:
26+
name: {{ .Values.requester.inferenceServerConfig }}
27+
{{- with .Values.requester.inferenceServerConfigAnnotations }}
28+
annotations:
29+
{{- toYaml . | nindent 4 }}
30+
{{- end }}
31+
spec:
32+
launcherConfigName: {{ .Values.requester.launcherConfig }}
33+
modelServerConfig:
34+
{{- with .Values.requester.modelServerConfig.annotations }}
35+
annotations:
36+
{{- toYaml . | nindent 6 }}
37+
{{- end }}
38+
{{- $modelParts := split "/" .Values.modelArtifacts.name }}
39+
labels:
40+
model-reg: {{ index $modelParts._1 | quote }}
41+
model-repo: {{ index $modelParts._0 | quote }}
42+
{{- with .Values.requester.modelServerConfig.labels }}
43+
{{- toYaml . | nindent 6 }}
44+
{{- end }}
45+
{{- with .Values.requester.modelServerConfig.env_vars }}
46+
env_vars:
47+
{{- toYaml . | nindent 6 }}
48+
{{- end }}
49+
{{- if .Values.requester.modelServerConfig.options }}
50+
options: {{ .Values.requester.modelServerConfig.options | quote }}
51+
{{- end }}
52+
port: {{ .Values.requester.modelServerConfig.port | default 8005 }}
53+
{{- if or (.Values.decode.initContainers) (eq .Values.routing.proxy.enabled true) }}
54+
initContainers:
55+
{{- (include "llm-d-modelservice.routingProxy" (dict "proxy" .Values.routing.proxy "servicePort" .Values.routing.servicePort "Values" .Values)) | nindent 6 }}
56+
{{- if .Values.decode.initContainers }}
57+
{{- toYaml .Values.decode.initContainers | nindent 6 }}
58+
{{- end }}
59+
{{- end }}
60+
{{- with .Values.decode.containers }}
61+
containers:
62+
{{- range . }}
63+
{{- (include "llm-d-modelservice.container" (dict "role" "decode" "container" . "parallelism" $.Values.decode.parallelism "Values" $.Values "Release" $.Release "Chart" $.Chart "pdSpec" $.Values.decode)) | nindent 6 }}
64+
{{- end }}
65+
{{- end }}
66+
---
267
apiVersion: apps/v1
368
kind: ReplicaSet
469
metadata:
@@ -13,40 +78,18 @@ spec:
1378
labels:
1479
app: dp-app
1580
annotations:
16-
dual-pod.llm-d.ai/admin-port: "{{ .Values.requester.adminPort }}"
17-
dual-pod.llm-d.ai/server-patch: |
18-
metadata:
19-
labels: {
20-
{{- $modelParts := split "/" .Values.modelArtifacts.name -}}
21-
"model-reg": {{ index $modelParts._1 | quote }},
22-
"model-repo": {{ index $modelParts._0 | quote }},
23-
"app": null}
24-
spec:
25-
{{- if or (.Values.decode.initContainers) (eq .Values.routing.proxy.enabled true) }}
26-
initContainers:
27-
{{- (include "llm-d-modelservice.routingProxy" (dict "proxy" .Values.routing.proxy "servicePort" .Values.routing.servicePort "Values" .Values)) | nindent 12 }}
28-
{{- if .Values.decode.initContainers }}
29-
{{- toYaml .Values.decode.initContainers | nindent 12 }}
30-
{{- end }}
31-
{{- end }}
32-
{{- with .Values.decode.containers }}
33-
containers:
34-
{{- range . }}
35-
{{- (include "llm-d-modelservice.container" (dict "role" "decode" "container" . "parallelism" $.Values.decode.parallelism "Values" $.Values "Release" $.Release "Chart" $.Chart "pdSpec" $.Values.decode)) | nindent 14 }}
36-
{{- end }}
37-
{{- end }}
81+
dual-pods.llm-d.ai/admin-port: "{{ .Values.requester.adminPort }}"
82+
dual-pods.llm-d.ai/accelerators: {{ .Values.requester.accelerators | quote }}
83+
dual-pods.llm-d.ai/inference-server-config: {{ .Values.requester.inferenceServerConfig | required "requester.inferenceServerConfig is required" }}
3884
spec:
3985
containers:
4086
- name: inference-server
4187
image: {{ .Values.requester.image }}
4288
imagePullPolicy: Always
4389
command:
4490
- /app/requester
45-
env:
46-
- name: PROBES_PORT
47-
value: {{ .Values.requester.port.probes | quote }}
48-
- name: SPI_PORT
49-
value: {{ .Values.requester.port.spi | quote }}
91+
- --logtostderr=false
92+
- --log_file=/tmp/requester.log
5093
ports:
5194
- name: probes
5295
containerPort: {{ .Values.requester.port.probes }}

charts/llm-d-modelservice/values.schema.json

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3589,6 +3589,12 @@
35893589
"additionalProperties": true,
35903590
"description": "Requester configuration part of the dual-pod solution for FMA",
35913591
"properties": {
3592+
"accelerators": {
3593+
"default": "GPU-0",
3594+
"required": [],
3595+
"title": "accelerators",
3596+
"type": "string"
3597+
},
35923598
"adminPort": {
35933599
"default": 8081,
35943600
"required": [],
@@ -3607,6 +3613,161 @@
36073613
"title": "image",
36083614
"type": "string"
36093615
},
3616+
"inferenceServerConfig": {
3617+
"default": "",
3618+
"description": "Name of the InferenceServerConfig resource (required when requester is enabled)",
3619+
"required": [],
3620+
"title": "inferenceServerConfig",
3621+
"type": "string"
3622+
},
3623+
"launcherConfig": {
3624+
"default": "",
3625+
"description": "Name of the LauncherConfig resource (required when requester is enabled)",
3626+
"required": [],
3627+
"title": "launcherConfig",
3628+
"type": "string"
3629+
},
3630+
"launcherConfigSpec": {
3631+
"additionalProperties": true,
3632+
"description": " additionalProperties: true @schema",
3633+
"properties": {
3634+
"maxSleepingInstances": {
3635+
"default": 3,
3636+
"required": [],
3637+
"title": "maxSleepingInstances",
3638+
"type": "integer"
3639+
},
3640+
"podTemplate": {
3641+
"additionalProperties": false,
3642+
"properties": {
3643+
"metadata": {
3644+
"additionalProperties": true,
3645+
"description": " additionalProperties: true @schema",
3646+
"required": [],
3647+
"title": "metadata"
3648+
},
3649+
"spec": {
3650+
"additionalProperties": false,
3651+
"properties": {
3652+
"containers": {
3653+
"items": {
3654+
"anyOf": [
3655+
{
3656+
"additionalProperties": false,
3657+
"properties": {
3658+
"args": {
3659+
"items": {
3660+
"anyOf": [
3661+
{
3662+
"required": [],
3663+
"type": "string"
3664+
}
3665+
],
3666+
"required": []
3667+
},
3668+
"required": [],
3669+
"title": "args",
3670+
"type": "array"
3671+
},
3672+
"command": {
3673+
"items": {
3674+
"anyOf": [
3675+
{
3676+
"required": [],
3677+
"type": "string"
3678+
},
3679+
{
3680+
"required": [],
3681+
"type": "string"
3682+
}
3683+
],
3684+
"required": []
3685+
},
3686+
"required": [],
3687+
"title": "command",
3688+
"type": "array"
3689+
},
3690+
"image": {
3691+
"default": "ghcr.io/llm-d-incubation/llm-d-fast-model-actuation/launcher:latest",
3692+
"required": [],
3693+
"title": "image",
3694+
"type": "string"
3695+
},
3696+
"imagePullPolicy": {
3697+
"default": "IfNotPresent",
3698+
"required": [],
3699+
"title": "imagePullPolicy",
3700+
"type": "string"
3701+
},
3702+
"name": {
3703+
"default": "inference-server",
3704+
"required": [],
3705+
"title": "name",
3706+
"type": "string"
3707+
}
3708+
},
3709+
"required": [],
3710+
"type": "object"
3711+
}
3712+
],
3713+
"required": []
3714+
},
3715+
"required": [],
3716+
"title": "containers",
3717+
"type": "array"
3718+
}
3719+
},
3720+
"required": [],
3721+
"title": "spec",
3722+
"type": "object"
3723+
}
3724+
},
3725+
"required": [],
3726+
"title": "podTemplate",
3727+
"type": "object"
3728+
}
3729+
},
3730+
"required": [],
3731+
"title": "launcherConfigSpec"
3732+
},
3733+
"modelServerConfig": {
3734+
"additionalProperties": true,
3735+
"description": " additionalProperties: true @schema",
3736+
"properties": {
3737+
"annotations": {
3738+
"additionalProperties": true,
3739+
"description": " additionalProperties: true @schema",
3740+
"required": [],
3741+
"title": "annotations"
3742+
},
3743+
"env_vars": {
3744+
"additionalProperties": true,
3745+
"description": " additionalProperties: true @schema",
3746+
"required": [],
3747+
"title": "env_vars"
3748+
},
3749+
"labels": {
3750+
"additionalProperties": true,
3751+
"description": " additionalProperties: true @schema",
3752+
"required": [],
3753+
"title": "labels"
3754+
},
3755+
"options": {
3756+
"default": "",
3757+
"required": [],
3758+
"title": "options",
3759+
"type": "string"
3760+
},
3761+
"port": {
3762+
"default": 8005,
3763+
"required": [],
3764+
"title": "port",
3765+
"type": "integer"
3766+
}
3767+
},
3768+
"required": [],
3769+
"title": "modelServerConfig"
3770+
},
36103771
"port": {
36113772
"additionalProperties": false,
36123773
"properties": {

0 commit comments

Comments
 (0)