Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.

Commit a57e681

Browse files
authored
Merge pull request #4 from jgchn/mk-helm-3
lws
2 parents 23e67f6 + 10099c1 commit a57e681

File tree

4 files changed

+343
-34
lines changed

4 files changed

+343
-34
lines changed

helm/templates/_helpers.tpl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,17 @@ initContainers:
111111
runAsNonRoot: true
112112
{{- end }}
113113

114+
{{- define "llm-d-modelservice.parallelism" -}}
115+
{{- $parallelism := dict "tensor" 1 "data" 1 -}}
116+
{{- if and . .tensor }}
117+
{{- $parallelism = mergeOverwrite $parallelism (dict "tensor" .tensor) -}}
118+
{{- end }}
119+
{{- if and . .data }}
120+
{{- $parallelism = mergeOverwrite $parallelism (dict "data" .data) -}}
121+
{{- end }}
122+
{{- $parallelism | toYaml | nindent 0 }}
123+
{{- end }}
124+
114125
{{/* P/D service account name */}}
115126
{{- define "llm-d-modelservice.pdServiceAccountName" -}}
116127
{{ include "llm-d-modelservice.sanitizedModelName" . }}-sa

helm/templates/decode-deployment.yaml

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
12
{{- if .Values.decode }}
23
apiVersion: apps/v1
34
kind: Deployment
@@ -31,8 +32,8 @@ spec:
3132
{{- with .Values.decode.containers }}
3233
containers:
3334
{{- range . }}
34-
- name: {{ .name }}
35-
image: {{ .image }}
35+
- name: {{ default "vllm" .name }}
36+
image: {{ required "image of container is required" .image }}
3637
{{- with .securityContext }}
3738
securityContext:
3839
{{- toYaml . | nindent 12 }}
@@ -75,17 +76,24 @@ spec:
7576
readinessProbe:
7677
{{- toYaml . | nindent 12 }}
7778
{{- end }}
78-
{{- with .resources }}
7979
resources:
80-
{{- toYaml . | nindent 12 }}
81-
{{- end }}
80+
limits:
81+
{{- if and .resources .resources.limits }}
82+
{{- omit .resources.limits "nvidia.com/gpu" | toYaml | nindent 14 }}
83+
{{- end }}
84+
nvidia.com/gpu: {{ $parallelism.tensor }}
85+
requests:
86+
{{- if and .resources .resources.limits }}
87+
{{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 14 }}
88+
{{- end }}
89+
nvidia.com/gpu: {{ $parallelism.tensor }}
8290
{{- /* volumeMount */}}
8391
{{- if or .volumeMounts .mountModelVolume }}
8492
volumeMounts:
8593
{{- end -}}
8694
{{- /* user supplied volume mount in values */}}
8795
{{- with .volumeMounts }}
88-
{{- toYaml . | nindent 14 }}
96+
{{- toYaml . | nindent 12 }}
8997
{{- end }}
9098
{{- /* what we add if mounModelVolume is true */}}
9199
{{- if .mountModelVolume }}

helm/templates/decode-lws.yaml

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
2+
{{- if and $parallelism.data (gt (int $parallelism.data) 1) }}
3+
apiVersion: leaderworkerset.x-k8s.io/v1
4+
kind: LeaderWorkerSet
5+
metadata:
6+
name: {{ include "llm-d-modelservice.fullname" . }}-decode
7+
labels:
8+
{{- include "llm-d-modelservice.labels" . | nindent 4 }}
9+
{{- include "llm-d-modelservice.decodelabels" . | nindent 4 }}
10+
spec:
11+
{{- if not .Values.decode.autoscaling.enabled }}
12+
replicas: {{ default 1 .Values.decode.replicas }}
13+
{{- end }}
14+
leaderWorkerTemplate:
15+
size: {{ .Values.decode.parallelism.data }}
16+
leaderTemplate:
17+
metadata:
18+
labels:
19+
{{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
20+
spec:
21+
{{- with .Values.routing }}
22+
{{ (include "llm-d-modelservice.routingProxy" .) | nindent 8 }}
23+
{{- end }}
24+
25+
{{- with .Values.decode.imagePullSecrets }}
26+
imagePullSecrets:
27+
{{- toYaml . | nindent 8 }}
28+
{{- end }}
29+
serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
30+
{{- with .Values.podSecurityContext }}
31+
securityContext:
32+
{{- toYaml . | nindent 8 }}
33+
{{- end }}
34+
{{- with .Values.decode.acceleratorTypes }}
35+
{{- include "llm-d-modelservice.acceleratorTypes" . | nindent 8 }}
36+
{{- end }}
37+
{{- with .Values.decode.containers }}
38+
containers:
39+
{{- range . }}
40+
- name: {{ default "vllm" .name }}
41+
image: {{ required "image of container is required" .image }}
42+
{{- with .securityContext }}
43+
securityContext:
44+
{{- toYaml . | nindent 14 }}
45+
{{- end }}
46+
{{- with .imagePullPolicy }}
47+
imagePullPolicy: {{ . }}
48+
{{- end }}
49+
{{- with .command }}
50+
command:
51+
{{- toYaml . | nindent 12 }}
52+
{{- end }}
53+
{{- with .args }}
54+
args:
55+
{{- toYaml . | nindent 12 }}
56+
{{- end }}
57+
{{- /* insert user's env for this container */}}
58+
{{- if or .env .mountModelVolume}}
59+
env:
60+
{{- end }}
61+
{{- with .env }}
62+
{{- toYaml . | nindent 12 }}
63+
{{- end }}
64+
{{- /* insert envs based on what modelArtifact prefix */}}
65+
{{- if .mountModelVolume }}
66+
- name: HF_HOME
67+
value: /model-cache
68+
{{- with $.Values.modelArtifacts.authSecretName }}
69+
- name: HF_TOKEN
70+
valueFrom:
71+
secretKeyRef:
72+
name: {{ . }}
73+
key: HF_TOKEN
74+
{{- end }}
75+
{{- end }}
76+
{{- with .livenessProbe }}
77+
livenessProbe:
78+
{{- toYaml . | nindent 12 }}
79+
{{- end }}
80+
{{- with .readinessProbe }}
81+
readinessProbe:
82+
{{- toYaml . | nindent 12 }}
83+
{{- end }}
84+
resources:
85+
limits:
86+
{{- if and .resources .resources.limits }}
87+
{{- omit .resources.limits "nvidia.com/gpu" | toYaml | nindent 16 }}
88+
{{- end }}
89+
nvidia.com/gpu: {{ $parallelism.tensor }}
90+
requests:
91+
{{- if and .resources .resources.limits }}
92+
{{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 16 }}
93+
{{- end }}
94+
nvidia.com/gpu: {{ $parallelism.tensor }}
95+
{{- /* volumeMount */}}
96+
{{- if or .volumeMounts .mountModelVolume }}
97+
volumeMounts:
98+
{{- end -}}
99+
{{- /* user supplied volume mount in values */}}
100+
{{- with .volumeMounts }}
101+
{{- toYaml . | nindent 14 }}
102+
{{- end }}
103+
{{- /* what we add if mounModelVolume is true */}}
104+
{{- if .mountModelVolume }}
105+
- name: model-storage
106+
mountPath: /model-cache
107+
{{- end }}
108+
109+
{{- with .workingDir }}
110+
workingDir: {{ . }}
111+
{{- end }}
112+
{{- with .stdin }}
113+
stdin: {{ . }}
114+
{{- end }}
115+
{{- with .tty }}
116+
tty: {{ . }}
117+
{{- end }}
118+
{{- end }} {{/* range . */}}
119+
{{- end }} {{/* with .Values.decode.containers */}}
120+
volumes:
121+
{{- with .Values.decode.volumes }}
122+
{{- toYaml . | nindent 8 }}
123+
{{- end }}
124+
{{- if eq .Values.modelArtifacts.prefix "hf" }}
125+
- name: model-storage
126+
emptyDir:
127+
sizeLimit: {{ default "0" .Values.modelArtifacts.size }}
128+
{{- else if eq .Values.modelArtifacts.prefix "pvc" }}
129+
- name: model-storage
130+
persistentVolumeClaim:
131+
claimName: {{ .Values.modelArtifacts.artifact }}
132+
readOnly: true
133+
{{- else if eq .Values.modelArtifacts.prefix "oci" }}
134+
- name: model-storage
135+
image:
136+
reference: {{ .Values.modelArtifacts.artifact }}
137+
{{- with .Values.modelArtifacts.imagePullPolicy }}
138+
pullPolicy: {{ . }}
139+
{{- end }}
140+
{{- end }}
141+
142+
workerTemplate:
143+
metadata:
144+
labels:
145+
{{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
146+
spec:
147+
{{- with .Values.decode.acceleratorTypes }}
148+
{{- (include "llm-d-modelservice.acceleratorTypes" .) | nindent 6 }}
149+
{{- end }}
150+
serviceAccountName: {{ (include "llm-d-modelservice.pdServiceAccountName" .)}}
151+
{{- end }} {{/* if and $parallelism.data (gt $parallelism.data 1) */}}

0 commit comments

Comments
 (0)