Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.

Commit f1ba55b

Browse files
committed
Feedbacl
Signed-off-by: Jing Chen <[email protected]>
1 parent 368e846 commit f1ba55b

12 files changed

+131
-401
lines changed

helm/examples/output-facebook.yaml

Lines changed: 57 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,27 @@
33
apiVersion: v1
44
kind: ServiceAccount
55
metadata:
6-
name: facebook-llm-d-modelservice-epp-sa
6+
name: facebook-sim-test-llm-d-modelservice-epp-sa
77
labels:
88
helm.sh/chart: llm-d-modelservice-0.0.1
99
app.kubernetes.io/version: "0.0.1"
1010
app.kubernetes.io/managed-by: Helm
11-
automountServiceAccountToken: true
1211
---
1312
# Source: llm-d-modelservice/templates/serviceaccount.yaml
1413
apiVersion: v1
1514
kind: ServiceAccount
1615
metadata:
17-
name: facebook-llm-d-modelservice-sa
16+
name: facebook-sim-test-llm-d-modelservice-sa
1817
labels:
1918
helm.sh/chart: llm-d-modelservice-0.0.1
2019
app.kubernetes.io/version: "0.0.1"
2120
app.kubernetes.io/managed-by: Helm
22-
automountServiceAccountToken: true
2321
---
2422
# Source: llm-d-modelservice/templates/epp-service.yaml
2523
apiVersion: v1
2624
kind: Service
2725
metadata:
28-
name: facebook-llm-d-modelservice-epp
26+
name: facebook-sim-test-llm-d-modelservice-epp-service
2927
labels:
3028
helm.sh/chart: llm-d-modelservice-0.0.1
3129
app.kubernetes.io/version: "0.0.1"
@@ -39,14 +37,14 @@ spec:
3937
appProtocol: http2
4038
selector:
4139
app.kubernetes.io/name: llm-d-modelservice
42-
app.kubernetes.io/instance: facebook
43-
llm-d.ai/epp: facebook-llm-d-modelservice-epp
40+
app.kubernetes.io/instance: facebook-sim-test
41+
llm-d.ai/epp: facebook-sim-test-llm-d-modelservice-epp
4442
---
4543
# Source: llm-d-modelservice/templates/decode-deployment.yaml
4644
apiVersion: apps/v1
4745
kind: Deployment
4846
metadata:
49-
name: facebook-llm-d-modelservice-decode
47+
name: facebook-sim-test-llm-d-modelservice-decode
5048
labels:
5149
helm.sh/chart: llm-d-modelservice-0.0.1
5250
app.kubernetes.io/version: "0.0.1"
@@ -56,17 +54,15 @@ spec:
5654
selector:
5755
matchLabels:
5856
llm-d.ai/inferenceServing: "true"
59-
llm-d.ai/model: facebook
57+
llm-d.ai/model: facebook-sim-test
6058
llm-d.ai/role: decode
6159
template:
6260
metadata:
6361
labels:
6462
llm-d.ai/inferenceServing: "true"
65-
llm-d.ai/model: facebook
63+
llm-d.ai/model: facebook-sim-test
6664
llm-d.ai/role: decode
6765
spec:
68-
serviceAccountName: facebook-llm-d-modelservice-sa
69-
7066
initContainers:
7167
- name: routing-proxy
7268
args:
@@ -78,26 +74,27 @@ spec:
7874
imagePullPolicy: Always
7975
ports:
8076
- containerPort: 8000
81-
protocol: TCP
8277
resources: {}
8378
restartPolicy: Always
8479
securityContext:
8580
allowPrivilegeEscalation: false
8681
runAsNonRoot: true
82+
83+
serviceAccountName: facebook-sim-test-llm-d-modelservice-sa
8784
containers:
88-
- name: vllm
89-
image: ghcr.io/llm-d/llm-d:0.0.8
90-
command:
85+
- name: vllm
86+
image: ghcr.io/llm-d/llm-d:0.0.8
87+
command:
9188
- vllm
9289
- serve
93-
args:
90+
args:
9491
- facebook/opt-125m
9592
- --port
9693
- "8200"
9794
- --enforce-eager
9895
- --kv-transfer-config
9996
- '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
100-
env:
97+
env:
10198
- name: CUDA_VISIBLE_DEVICES
10299
value: "0"
103100
- name: UCX_TLS
@@ -114,17 +111,12 @@ spec:
114111
value: DEBUG
115112
- name: HF_HOME
116113
value: /model-cache
117-
resources:
118-
limits:
119-
cpu: "16"
120-
memory: 16Gi
121-
nvidia.com/gpu: "1"
122-
requests:
123-
cpu: "16"
124-
memory: 16Gi
125-
nvidia.com/gpu: "1"
126-
127-
volumeMounts:
114+
resources:
115+
limits:
116+
nvidia.com/gpu: "1"
117+
requests:
118+
nvidia.com/gpu: "1"
119+
volumeMounts:
128120
- name: model-storage
129121
mountPath: /model-cache
130122
volumes:
@@ -136,29 +128,29 @@ spec:
136128
apiVersion: apps/v1
137129
kind: Deployment
138130
metadata:
139-
name: facebook-llm-d-modelservice-epp
131+
name: facebook-sim-test-llm-d-modelservice-epp
140132
labels:
141-
llm-d.ai/epp: facebook-llm-d-modelservice-epp
142-
namespace: e2e-solution
133+
llm-d.ai/epp: facebook-sim-test-llm-d-modelservice-epp
134+
namespace: default
143135
spec:
144136
replicas: 1
145137
selector:
146138
matchLabels:
147-
llm-d.ai/epp: facebook-llm-d-modelservice-epp
139+
llm-d.ai/epp: facebook-sim-test-llm-d-modelservice-epp
148140
template:
149141
metadata:
150142
labels:
151-
llm-d.ai/epp: facebook-llm-d-modelservice-epp
143+
llm-d.ai/epp: facebook-sim-test-llm-d-modelservice-epp
152144
spec:
153145
containers:
154146
- name: epp
155147
imagePullPolicy: Always
156148
image: ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3
157149
args:
158150
- --poolName
159-
- facebook-llm-d-modelservice-inference-pool
151+
- facebook-sim-test-llm-d-modelservice-inference-pool
160152
- --poolNamespace
161-
- e2e-solution
153+
- default
162154
- -v
163155
- "4"
164156
- --zap-encoder
@@ -216,8 +208,8 @@ spec:
216208
- containerPort: 9090
217209
name: metrics
218210
protocol: TCP
219-
serviceAccount: facebook-llm-d-modelservice-epp-sa
220-
serviceAccountName: facebook-llm-d-modelservice-epp-sa
211+
serviceAccount: facebook-sim-test-llm-d-modelservice-epp-sa
212+
serviceAccountName: facebook-sim-test-llm-d-modelservice-epp-sa
221213
readinessProbe:
222214
grpc:
223215
port: 9003
@@ -241,7 +233,7 @@ spec:
241233
apiVersion: apps/v1
242234
kind: Deployment
243235
metadata:
244-
name: facebook-llm-d-modelservice-prefill
236+
name: facebook-sim-test-llm-d-modelservice-prefill
245237
labels:
246238
helm.sh/chart: llm-d-modelservice-0.0.1
247239
app.kubernetes.io/version: "0.0.1"
@@ -251,30 +243,31 @@ spec:
251243
selector:
252244
matchLabels:
253245
llm-d.ai/inferenceServing: "true"
254-
llm-d.ai/model: facebook
246+
llm-d.ai/model: facebook-sim-test
255247
llm-d.ai/role: prefill
256248
template:
257249
metadata:
258250
labels:
259251
llm-d.ai/inferenceServing: "true"
260-
llm-d.ai/model: facebook
252+
llm-d.ai/model: facebook-sim-test
261253
llm-d.ai/role: prefill
262254
spec:
263-
serviceAccountName: facebook-llm-d-modelservice-sa
255+
256+
serviceAccountName: facebook-sim-test-llm-d-modelservice-sa
264257
containers:
265-
- name: vllm
266-
image: ghcr.io/llm-d/llm-d:0.0.8
267-
command:
258+
- name: vllm
259+
image: ghcr.io/llm-d/llm-d:0.0.8
260+
command:
268261
- vllm
269262
- serve
270-
args:
263+
args:
271264
- facebook/opt-125m
272265
- --port
273266
- "8000"
274267
- --enforce-eager
275268
- --kv-transfer-config
276269
- '{"kv_connector":"NixlConnector", "kv_role":"kv_both"}'
277-
env:
270+
env:
278271
- name: CUDA_VISIBLE_DEVICES
279272
value: "0"
280273
- name: UCX_TLS
@@ -287,16 +280,11 @@ spec:
287280
fieldPath: status.podIP
288281
- name: VLLM_LOGGING_LEVEL
289282
value: DEBUG
290-
resources:
291-
limits:
292-
cpu: "16"
293-
memory: 16Gi
294-
nvidia.com/gpu: "1"
295-
requests:
296-
cpu: "16"
297-
memory: 16Gi
298-
nvidia.com/gpu: "1"
299-
283+
resources:
284+
limits:
285+
nvidia.com/gpu: "1"
286+
requests:
287+
nvidia.com/gpu: "1"
300288
volumes:
301289
- name: model-storage
302290
emptyDir:
@@ -306,22 +294,22 @@ spec:
306294
apiVersion: gateway.networking.k8s.io/v1
307295
kind: HTTPRoute
308296
metadata:
309-
name: facebook-llm-d-modelservice-http-route
310-
namespace: e2e-solution
297+
name: facebook-sim-test-llm-d-modelservice-http-route
298+
namespace: default
311299
labels:
312300
helm.sh/chart: llm-d-modelservice-0.0.1
313301
app.kubernetes.io/version: "0.0.1"
314302
app.kubernetes.io/managed-by: Helm
315303
spec:
316304
parentRefs:
317305
- group: gateway.networking.k8s.io
318-
kind: Istio
306+
kind: Gateway
319307
name: inference-gateway
320308
rules:
321309
- backendRefs:
322310
- group: inference.networking.x-k8s.io
323311
kind: InferencePool
324-
name: facebook-llm-d-modelservice-inference-pool
312+
name: facebook-sim-test-llm-d-modelservice-inference-pool
325313
port: 8000
326314
weight: 1
327315
matches:
@@ -337,31 +325,31 @@ spec:
337325
apiVersion: inference.networking.x-k8s.io/v1alpha2
338326
kind: InferenceModel
339327
metadata:
340-
name: facebook-llm-d-modelservice-inference-model
341-
namespace: e2e-solution
328+
name: facebook-sim-test-llm-d-modelservice-inference-model
329+
namespace: default
342330
labels:
343331
llm-d.ai/inferenceServing: "true"
344-
llm-d.ai/model: facebook
332+
llm-d.ai/model: facebook-sim-test
345333
spec:
346334
modelName: facebook/opt-125m
347335
poolRef:
348336
group: inference.networking.x-k8s.io
349337
kind: InferencePool
350-
name: facebook-llm-d-modelservice-inference-pool
338+
name: facebook-sim-test-llm-d-modelservice-inference-pool
351339
---
352340
# Source: llm-d-modelservice/templates/routing.yaml
353341
apiVersion: inference.networking.x-k8s.io/v1alpha2
354342
kind: InferencePool
355343
metadata:
356-
name: facebook-llm-d-modelservice-inference-pool
357-
namespace: e2e-solution
344+
name: facebook-sim-test-llm-d-modelservice-inference-pool
345+
namespace: default
358346
spec:
359347
extensionRef:
360348
failureMode: FailClose
361349
group: ""
362350
kind: Service
363-
name: facebook-llm-d-modelservice-epp-service
351+
name: facebook-sim-test-llm-d-modelservice-epp-service
364352
selector:
365353
llm-d.ai/inferenceServing: "true"
366-
llm-d.ai/model: facebook
354+
llm-d.ai/model: facebook-sim-test
367355
targetPortNumber: 8000

0 commit comments

Comments
 (0)