Merge pull request #4 from jgchn/mk-helm-3

jgchn · web-flow · commit a57e681d69cb · 2025-06-16T10:00:24.000-04:00
lws
diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl
@@ -111,6 +111,17 @@ initContainers:
     runAsNonRoot: true
 {{- end }}
 
+{{- define "llm-d-modelservice.parallelism" -}}
+{{- $parallelism := dict "tensor" 1 "data" 1 -}}
+{{- if and . .tensor }}
+{{- $parallelism = mergeOverwrite $parallelism (dict "tensor" .tensor) -}}
+{{- end }}
+{{- if and . .data }}
+{{- $parallelism = mergeOverwrite $parallelism (dict "data" .data) -}}
+{{- end }}
+{{- $parallelism | toYaml | nindent 0 }}
+{{- end }}
+
 {{/* P/D service account name */}}
 {{- define "llm-d-modelservice.pdServiceAccountName" -}}
 {{ include "llm-d-modelservice.sanitizedModelName" . }}-sa
diff --git a/helm/templates/decode-deployment.yaml b/helm/templates/decode-deployment.yaml
@@ -1,3 +1,4 @@
+{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
 {{- if .Values.decode }}
 apiVersion: apps/v1
 kind: Deployment
@@ -31,8 +32,8 @@ spec:
       {{- with .Values.decode.containers }}
       containers:
       {{- range . }}
-        - name: {{ .name }}
-          image: {{ .image }}
+        - name: {{ default "vllm" .name }}
+          image: {{ required "image of container is required" .image }}
           {{- with .securityContext }}
           securityContext:
             {{- toYaml . | nindent 12 }}
@@ -75,17 +76,24 @@ spec:
           readinessProbe:
             {{- toYaml . | nindent 12 }}
           {{- end }}
-          {{- with .resources }}
           resources:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
+            limits:
+              {{- if and .resources .resources.limits }}
+              {{- omit .resources.limits "nvidia.com/gpu"  | toYaml | nindent 14 }}
+              {{- end }}
+              nvidia.com/gpu: {{ $parallelism.tensor }}
+            requests:
+              {{- if and .resources .resources.limits }}
+              {{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 14 }}
+              {{- end }}
+              nvidia.com/gpu: {{ $parallelism.tensor }}
           {{- /* volumeMount */}}
           {{- if or .volumeMounts .mountModelVolume }}
           volumeMounts:
           {{- end -}}
           {{- /* user supplied volume mount in values */}}
           {{- with .volumeMounts }}
-            {{- toYaml . | nindent 14 }}
+            {{- toYaml . | nindent 12 }}
           {{- end }}
           {{- /* what we add if mounModelVolume is true */}}
           {{- if .mountModelVolume }}
diff --git a/helm/templates/decode-lws.yaml b/helm/templates/decode-lws.yaml
@@ -0,0 +1,151 @@
+{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
+{{- if and $parallelism.data (gt (int $parallelism.data) 1) }}
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: {{ include "llm-d-modelservice.fullname" . }}-decode
+  labels:
+    {{- include "llm-d-modelservice.labels" . | nindent 4 }}
+    {{- include "llm-d-modelservice.decodelabels" . | nindent 4 }}
+spec:
+  {{- if not .Values.decode.autoscaling.enabled }}
+  replicas: {{ default 1 .Values.decode.replicas }}
+  {{- end }}
+  leaderWorkerTemplate:
+    size: {{ .Values.decode.parallelism.data }}
+    leaderTemplate:
+      metadata:
+        labels:
+          {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
+      spec:
+        {{- with .Values.routing }}
+        {{ (include "llm-d-modelservice.routingProxy" .) | nindent 8 }}
+        {{- end }}
+
+        {{- with .Values.decode.imagePullSecrets }}
+        imagePullSecrets:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
+        {{- with .Values.podSecurityContext }}
+        securityContext:
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.decode.acceleratorTypes }}
+        {{- include "llm-d-modelservice.acceleratorTypes" . | nindent 8 }}
+        {{- end }}
+        {{- with .Values.decode.containers }}
+        containers:
+        {{- range . }}
+          - name: {{ default "vllm" .name }}
+            image: {{ required "image of container is required" .image }}
+            {{- with .securityContext }}
+            securityContext:
+              {{- toYaml . | nindent 14 }}
+            {{- end }}
+            {{- with .imagePullPolicy }}
+            imagePullPolicy: {{ . }}
+            {{- end }}
+            {{- with .command }}
+            command:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- with .args }}
+            args:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- /* insert user's env for this container */}}
+            {{- if or .env .mountModelVolume}}
+            env:
+            {{- end }}
+            {{- with .env }}
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- /* insert envs based on what modelArtifact prefix */}}
+            {{- if .mountModelVolume }}
+            - name: HF_HOME
+              value: /model-cache
+            {{- with $.Values.modelArtifacts.authSecretName }}
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ . }}
+                  key: HF_TOKEN
+            {{- end }}
+            {{- end }}
+            {{- with .livenessProbe }}
+            livenessProbe:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            {{- with .readinessProbe }}
+            readinessProbe:
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+            resources:
+              limits:
+                {{- if and .resources .resources.limits }}
+                {{- omit .resources.limits "nvidia.com/gpu"  | toYaml | nindent 16 }}
+                {{- end }}
+                nvidia.com/gpu: {{ $parallelism.tensor }}
+              requests:
+                {{- if and .resources .resources.limits }}
+                {{- omit .resources.requests "nvidia.com/gpu" | toYaml | nindent 16 }}
+                {{- end }}
+                nvidia.com/gpu: {{ $parallelism.tensor }}
+            {{- /* volumeMount */}}
+            {{- if or .volumeMounts .mountModelVolume }}
+            volumeMounts:
+            {{- end -}}
+            {{- /* user supplied volume mount in values */}}
+            {{- with .volumeMounts }}
+              {{- toYaml . | nindent 14 }}
+            {{- end }}
+            {{- /* what we add if mounModelVolume is true */}}
+            {{- if .mountModelVolume }}
+              - name: model-storage
+                mountPath: /model-cache
+            {{- end }}
+
+            {{- with .workingDir }}
+            workingDir: {{ . }}
+            {{- end }}
+            {{- with .stdin }}
+            stdin: {{ . }}
+            {{- end }}
+            {{- with .tty }}
+            tty: {{ . }}
+            {{- end }}
+        {{- end }} {{/* range . */}}
+        {{- end }} {{/* with .Values.decode.containers */}}
+      volumes:
+      {{- with .Values.decode.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if eq .Values.modelArtifacts.prefix "hf" }}
+        - name: model-storage
+          emptyDir: 
+            sizeLimit: {{ default "0" .Values.modelArtifacts.size }}
+      {{- else if eq .Values.modelArtifacts.prefix "pvc" }}
+        - name: model-storage
+          persistentVolumeClaim:
+            claimName: {{ .Values.modelArtifacts.artifact }}
+            readOnly: true
+      {{- else if eq .Values.modelArtifacts.prefix "oci" }}
+        - name: model-storage
+          image:
+            reference: {{ .Values.modelArtifacts.artifact }}
+            {{- with .Values.modelArtifacts.imagePullPolicy }} 
+            pullPolicy: {{ . }}
+            {{- end }}
+      {{- end }}
+
+    workerTemplate:
+      metadata:
+        labels:
+          {{- include "llm-d-modelservice.decodelabels" . | nindent 10 }}
+      spec:
+        {{- with .Values.decode.acceleratorTypes }}
+        {{- (include "llm-d-modelservice.acceleratorTypes" .) | nindent 6 }}
+        {{- end }}
+        serviceAccountName: {{ (include "llm-d-modelservice.pdServiceAccountName" .)}}
+{{- end }} {{/* if and $parallelism.data (gt $parallelism.data 1) */}}
diff --git a/helm/values-msvc-mk.yaml b/helm/values-msvc-mk.yaml