Skip to content

Commit ef8c546

Browse files
Merge pull request #1480 from porter-dev/sms/hf-speedrun
hf llm additional values
2 parents 39edbf2 + bbf1f29 commit ef8c546

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

addons/hf-llm-models/templates/deployment.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,19 @@ spec:
6969
- --tensor-parallel-size={{ .Values.tensorParallelSize }}
7070
{{- if .Values.maxModelLen }}
7171
- --max-model-len={{ .Values.maxModelLen }}
72+
{{- end }}
73+
{{- if .Values.extraArgs }}
74+
{{- range .Values.extraArgs }}
75+
- {{ . }}
76+
{{- end }}
7277
{{- end }}
7378
image: {{ .Values.vllmImage }}
7479
imagePullPolicy: IfNotPresent
7580
env:
7681
- name: HF_TOKEN
7782
value: {{ .Values.huggingFaceToken }}
83+
- name: NCCL_DEBUG
84+
value: INFO
7885
ports:
7986
- containerPort: 8000
8087
protocol: TCP
@@ -105,13 +112,19 @@ spec:
105112
nvidia.com/gpu: {{ .Values.resources.limits.nvidiaGpu }}
106113
{{- end }}
107114
volumeMounts:
115+
- name: dshm
116+
mountPath: /dev/shm
108117
- name: model-volume
109118
mountPath: {{ .Values.modelDir }}
110119
name: vllm
111120
securityContext:
112121
allowPrivilegeEscalation: false
113122
terminationGracePeriodSeconds: 10
114123
volumes:
124+
- name: dshm
125+
emptyDir:
126+
medium: Memory
127+
sizeLimit: 4Gi
115128
- name: model-volume
116129
persistentVolumeClaim:
117130
claimName: "{{ .Release.Name }}-hf-llm"

0 commit comments

Comments
 (0)