Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.

Commit 81c048c

Browse files
committed
Address inconsistencies
Signed-off-by: Jing Chen <[email protected]>
1 parent 3126aea commit 81c048c

File tree

6 files changed

+67
-69
lines changed

6 files changed

+67
-69
lines changed

helm/templates/_helpers.tpl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,12 @@ initContainers:
123123

124124
{{/* P/D service account name */}}
125125
{{- define "llm-d-modelservice.pdServiceAccountName" -}}
126-
{{ include "llm-d-modelservice.sanitizedModelName" . }}-sa
126+
{{ include "llm-d-modelservice.fullname" . }}-sa
127127
{{- end }}
128128

129129
{{/* EPP service account name */}}
130130
{{- define "llm-d-modelservice.eppServiceAccountName" -}}
131-
{{ include "llm-d-modelservice.sanitizedModelName" . }}-epp-sa
131+
{{ include "llm-d-modelservice.fullname" . }}-epp-sa
132132
{{- end }}
133133

134134
{{/*

helm/templates/decode-deployment.yaml

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ spec:
2828
{{- with .Values.decode.acceleratorTypes }}
2929
{{- include "llm-d-modelservice.acceleratorTypes" . | nindent 6 }}
3030
{{- end }}
31-
{{- /* initContainers */}}
31+
{{- /* Sidecar init container */}}
32+
{{- with .Values.routing }}
33+
{{ (include "llm-d-modelservice.routingProxy" .) | nindent 6 }}
34+
{{- end }}
35+
{{- /* User's other init containesr */}}
3236
{{- with .Values.decode.initContainers }}
33-
initContainers:
34-
{{- toYaml . | nindent 6 }}
35-
{{- end }}
37+
{{- toYaml . | nindent 8 }}
38+
{{- end }}
3639
{{- /* range $.Values.decode.containers */}}
3740
{{- with .Values.decode.containers }}
3841
containers:
@@ -81,19 +84,25 @@ spec:
8184
readinessProbe:
8285
{{- toYaml . | nindent 10 }}
8386
{{- end }}
84-
{{- with .resources }}
8587
resources:
8688
limits:
87-
{{- if .limits -}}
88-
{{- omit .limits "nvidia.com/gpu" | toYaml | nindent 12 }}
89+
{{- $limits := dict -}}
90+
{{- if and .resources .resources.limits -}}
91+
{{- $limits = omit .resources.limits "nvidia.com/gpu" }}
92+
{{- if gt (len $limits) 0 }}
93+
{{- toYaml $limits | nindent 12 }}
94+
{{- end }}
8995
{{- end }}
90-
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
96+
nvidia.com/gpu: {{ $parallelism.tensor }}
9197
requests:
92-
{{- if .limits -}}
93-
{{- omit .requests "nvidia.com/gpu" | toYaml | nindent 12 }}
98+
{{- $requests := dict -}}
99+
{{- if and .resources .resources.requests -}}
100+
{{- $requests = omit .resources.requests "nvidia.com/gpu" }}
94101
{{- end }}
95-
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
96-
{{- end }}
102+
{{- if gt (len $requests) 0 }}
103+
{{- toYaml $requests | nindent 12 }}
104+
{{- end }}
105+
nvidia.com/gpu: {{ $parallelism.tensor }}
97106
{{- /* volumeMount */}}
98107
{{- include "llm-d-modelservice.mountModelVolumeVolumeMounts" . | nindent 8 }}
99108
{{- end }}

helm/templates/examples/output-facebook.yaml

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
apiVersion: v1
44
kind: ServiceAccount
55
metadata:
6-
name: facebook-epp-sa
6+
name: facebook-llm-d-modelservice-epp-sa
77
labels:
88
helm.sh/chart: llm-d-modelservice-0.0.1
99
app.kubernetes.io/version: "0.0.1"
@@ -14,7 +14,7 @@ automountServiceAccountToken: true
1414
apiVersion: v1
1515
kind: ServiceAccount
1616
metadata:
17-
name: facebook-sa
17+
name: facebook-llm-d-modelservice-sa
1818
labels:
1919
helm.sh/chart: llm-d-modelservice-0.0.1
2020
app.kubernetes.io/version: "0.0.1"
@@ -65,21 +65,23 @@ spec:
6565
llm-d.ai/model: facebook
6666
llm-d.ai/role: decode
6767
spec:
68-
serviceAccountName: facebook-sa
68+
serviceAccountName: facebook-llm-d-modelservice-sa
69+
6970
initContainers:
70-
- args:
71-
- --port=8000
72-
- --vllm-port=8200
73-
- --connector=nixlv2
74-
- -v=6
75-
image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
76-
imagePullPolicy: Always
77-
name: routing-proxy
78-
ports:
79-
- containerPort: 8000
71+
- name: routing-proxy
72+
args:
73+
- --port=8000
74+
- --vllm-port=8200
75+
- --connector=nixlv2
76+
- -v=5
77+
image:
78+
imagePullPolicy: Always
79+
ports:
80+
- containerPort: 8000
8081
protocol: TCP
81-
restartPolicy: Always
82-
securityContext:
82+
resources: {}
83+
restartPolicy: Always
84+
securityContext:
8385
allowPrivilegeEscalation: false
8486
runAsNonRoot: true
8587
containers:
@@ -113,10 +115,11 @@ spec:
113115
value: /model-cache
114116
resources:
115117
limits:
116-
{}
118+
nvidia.com/gpu: 1
117119
requests:
118120
cpu: "16"
119121
memory: 16Gi
122+
nvidia.com/gpu: 1
120123

121124
volumeMounts:
122125
- name: model-storage
@@ -149,7 +152,7 @@ spec:
149152
app.kubernetes.io/version: "0.0.1"
150153
app.kubernetes.io/managed-by: Helm
151154
spec:
152-
serviceAccountName: facebook-epp-sa
155+
serviceAccountName: facebook-llm-d-modelservice-epp-sa
153156
---
154157
# Source: llm-d-modelservice/templates/epp-deployment.yaml
155158
apiVersion: apps/v1
@@ -235,8 +238,8 @@ spec:
235238
- containerPort: 9090
236239
name: metrics
237240
protocol: TCP
238-
serviceAccount: facebook-epp-sa
239-
serviceAccountName: facebook-epp-sa
241+
serviceAccount: facebook-llm-d-modelservice-epp-sa
242+
serviceAccountName: facebook-llm-d-modelservice-epp-sa
240243
readinessProbe:
241244
grpc:
242245
port: 9003
@@ -279,7 +282,7 @@ spec:
279282
llm-d.ai/model: facebook
280283
llm-d.ai/role: prefill
281284
spec:
282-
serviceAccountName: facebook-sa
285+
serviceAccountName: facebook-llm-d-modelservice-sa
283286
containers:
284287
- name: vllm
285288
image: ghcr.io/llm-d/llm-d:0.0.8
@@ -307,10 +310,11 @@ spec:
307310
value: DEBUG
308311
resources:
309312
limits:
310-
{}
313+
nvidia.com/gpu: 1
311314
requests:
312315
cpu: "16"
313316
memory: 16Gi
317+
nvidia.com/gpu: 1
314318

315319
volumes:
316320
- name: model-storage

helm/templates/examples/values-facebook.yaml

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@ httpRoute: true
88
routing:
99
# This is the model name for the OpenAI request
1010
modelName: facebook/opt-125m
11-
ports:
12-
servicePort: 8000 # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
13-
internalPort: 8200 # Sidecar forwards request to vllm container on this port
14-
proxy:
15-
targetPort: 8000
11+
servicePort: 8000 # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
12+
proxy:
13+
image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
14+
targetPort: 8200
1615
parentRefs:
1716
- group: gateway.networking.k8s.io
1817
kind: Istio
@@ -27,26 +26,6 @@ modelArtifacts:
2726
decode:
2827
enableService: false
2928
replicas: 1
30-
# parallelism:
31-
# tensor: 3
32-
# data: 2
33-
# dataLocal: 1
34-
initContainers:
35-
- name: routing-proxy
36-
image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
37-
imagePullPolicy: Always
38-
securityContext:
39-
allowPrivilegeEscalation: false
40-
runAsNonRoot: true
41-
args:
42-
- "--port=8000" # servicePort
43-
- "--vllm-port=8200" # internalPort
44-
- "--connector=nixlv2"
45-
- "-v=6"
46-
ports:
47-
- containerPort: 8000 # servicePort
48-
protocol: TCP
49-
restartPolicy: Always
5029
containers:
5130
- name: "vllm"
5231
image: "ghcr.io/llm-d/llm-d:0.0.8"

helm/templates/prefill-deployment.yaml

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,19 +81,25 @@ spec:
8181
readinessProbe:
8282
{{- toYaml . | nindent 10 }}
8383
{{- end }}
84-
{{- with .resources }}
8584
resources:
8685
limits:
87-
{{- if .limits -}}
88-
{{- omit .limits "nvidia.com/gpu" | toYaml | nindent 12 }}
86+
{{- $limits := dict -}}
87+
{{- if and .resources .resources.limits -}}
88+
{{- $limits = omit .resources.limits "nvidia.com/gpu" }}
89+
{{- if gt (len $limits) 0 }}
90+
{{- toYaml $limits | nindent 12 }}
8991
{{- end }}
90-
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
92+
{{- end }}
93+
nvidia.com/gpu: {{ $parallelism.tensor }}
9194
requests:
92-
{{- if .limits -}}
93-
{{- omit .requests "nvidia.com/gpu" | toYaml | nindent 12 }}
95+
{{- $requests := dict -}}
96+
{{- if and .resources .resources.requests -}}
97+
{{- $requests = omit .resources.requests "nvidia.com/gpu" }}
9498
{{- end }}
95-
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
96-
{{- end }}
99+
{{- if gt (len $requests) 0 }}
100+
{{- toYaml $requests | nindent 12 }}
101+
{{- end }}
102+
nvidia.com/gpu: {{ $parallelism.tensor }}
97103
{{- /* volumeMount */}}
98104
{{- include "llm-d-modelservice.mountModelVolumeVolumeMounts" . | nindent 8 }}
99105
{{- end }}

helm/templates/routing.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ spec:
5050
- group: inference.networking.x-k8s.io
5151
kind: InferencePool
5252
name: {{ include "llm-d-modelservice.fullname" . }}-inference-pool
53-
port: {{ .Values.routing.ports.servicePort }}
53+
port: {{ .Values.routing.servicePort }}
5454
weight: 1
5555
matches:
5656
- headers:

0 commit comments

Comments
 (0)