1+ {{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
2+ {{- if and $parallelism.data (gt (int $parallelism.data) 1) }}
3+ apiVersion : leaderworkerset.x-k8s.io/v1
4+ kind : LeaderWorkerSet
5+ metadata :
6+ name : {{ include "llm-d-modelservice.fullname" . }}-decode
7+ labels :
8+ {{- include "llm-d-modelservice.labels" . | nindent 4 }}
9+ {{- include "llm-d-modelservice.decodelabels" . | nindent 4 }}
10+ spec :
11+ {{- if not .Values.decode.autoscaling.enabled }}
12+ replicas : {{ default 1 .Values.decode.replicas }}
13+ {{- end }}
14+ leaderWorkerTemplate :
15+ size : {{ .Values.decode.parallelism.data }}
16+ leaderTemplate :
17+ metadata :
18+ labels :
19+ {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
20+ spec :
21+ {{- with .Values.routing }}
22+ {{ (include "llm-d-modelservice.routingProxy" .) | nindent 8 }}
23+ {{- end }}
24+
25+ {{- with .Values.decode.imagePullSecrets }}
26+ imagePullSecrets :
27+ {{- toYaml . | nindent 8 }}
28+ {{- end }}
29+ serviceAccountName : {{ include "llm-d-modelservice.pdServiceAccountName" . }}
30+ {{- with .Values.podSecurityContext }}
31+ securityContext :
32+ {{- toYaml . | nindent 8 }}
33+ {{- end }}
34+ {{- with .Values.decode.acceleratorTypes }}
35+ {{- include "llm-d-modelservice.acceleratorTypes" . | nindent 8 }}
36+ {{- end }}
37+ {{- with .Values.decode.containers }}
38+ containers :
39+ {{- range . }}
40+ - name : {{ default "vllm" .name }}
41+ image : {{ required "image of container is required" .image }}
42+ {{- with .securityContext }}
43+ securityContext :
44+ {{- toYaml . | nindent 14 }}
45+ {{- end }}
46+ {{- with .imagePullPolicy }}
47+ imagePullPolicy : {{ . }}
48+ {{- end }}
49+ {{- with .command }}
50+ command :
51+ {{- toYaml . | nindent 12 }}
52+ {{- end }}
53+ {{- with .args }}
54+ args :
55+ {{- toYaml . | nindent 12 }}
56+ {{- end }}
57+ {{- /* insert user's env for this container */}}
58+ {{- if or .env .mountModelVolume}}
59+ env :
60+ {{- end }}
61+ {{- with .env }}
62+ {{- toYaml . | nindent 12 }}
63+ {{- end }}
64+ {{- /* insert envs based on what modelArtifact prefix */}}
65+ {{- if .mountModelVolume }}
66+ - name : HF_HOME
67+ value : /model-cache
68+ {{- with $.Values.modelArtifacts.authSecretName }}
69+ - name : HF_TOKEN
70+ valueFrom :
71+ secretKeyRef :
72+ name : {{ . }}
73+ key : HF_TOKEN
74+ {{- end }}
75+ {{- end }}
76+ {{- with .livenessProbe }}
77+ livenessProbe :
78+ {{- toYaml . | nindent 12 }}
79+ {{- end }}
80+ {{- with .readinessProbe }}
81+ readinessProbe :
82+ {{- toYaml . | nindent 12 }}
83+ {{- end }}
84+ resources :
85+ limits :
86+ {{- if and .resources .resources.limits }}
87+ {{- omit .resources.limits "nvidia.com/gpu" | toYaml | nindent 16 }}
88+ {{- end }}
89+ nvidia.com/gpu : {{ $parallelism.tensor }}
90+ requests :
91+ {{- if and .resources .resources.limits }}
92+ {{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 16 }}
93+ {{- end }}
94+ nvidia.com/gpu : {{ $parallelism.tensor }}
95+ {{- /* volumeMount */}}
96+ {{- if or .volumeMounts .mountModelVolume }}
97+ volumeMounts :
98+ {{- end -}}
99+ {{- /* user supplied volume mount in values */}}
100+ {{- with .volumeMounts }}
101+ {{- toYaml . | nindent 14 }}
102+ {{- end }}
103+ {{- /* what we add if mounModelVolume is true */}}
104+ {{- if .mountModelVolume }}
105+ - name : model-storage
106+ mountPath : /model-cache
107+ {{- end }}
108+
109+ {{- with .workingDir }}
110+ workingDir : {{ . }}
111+ {{- end }}
112+ {{- with .stdin }}
113+ stdin : {{ . }}
114+ {{- end }}
115+ {{- with .tty }}
116+ tty : {{ . }}
117+ {{- end }}
118+ {{- end }} {{/* range . */}}
119+ {{- end }} {{/* with .Values.decode.containers */}}
120+ volumes :
121+ {{- with .Values.decode.volumes }}
122+ {{- toYaml . | nindent 8 }}
123+ {{- end }}
124+ {{- if eq .Values.modelArtifacts.prefix "hf" }}
125+ - name : model-storage
126+ emptyDir :
127+ sizeLimit : {{ default "0" .Values.modelArtifacts.size }}
128+ {{- else if eq .Values.modelArtifacts.prefix "pvc" }}
129+ - name : model-storage
130+ persistentVolumeClaim :
131+ claimName : {{ .Values.modelArtifacts.artifact }}
132+ readOnly : true
133+ {{- else if eq .Values.modelArtifacts.prefix "oci" }}
134+ - name : model-storage
135+ image :
136+ reference : {{ .Values.modelArtifacts.artifact }}
137+ {{- with .Values.modelArtifacts.imagePullPolicy }}
138+ pullPolicy : {{ . }}
139+ {{- end }}
140+ {{- end }}
141+
142+ workerTemplate :
143+ metadata :
144+ labels :
145+ {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
146+ spec :
147+ {{- with .Values.decode.acceleratorTypes }}
148+ {{- (include "llm-d-modelservice.acceleratorTypes" .) | nindent 6 }}
149+ {{- end }}
150+ serviceAccountName : {{ (include "llm-d-modelservice.pdServiceAccountName" .)}}
151+ {{- end }} {{/* if and $parallelism.data (gt $parallelism.data 1) */}}
0 commit comments