Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.

Commit 7e94af4

Browse files
committed
Working example in kind
Signed-off-by: Jing Chen <[email protected]>
1 parent e1e09ca commit 7e94af4

File tree

5 files changed

+59
-35
lines changed

5 files changed

+59
-35
lines changed

helm/examples/output-facebook.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,15 @@ spec:
111111
value: DEBUG
112112
- name: HF_HOME
113113
value: /model-cache
114+
114115
resources:
115116
limits:
117+
cpu: "16"
118+
memory: 16Gi
116119
nvidia.com/gpu: "1"
117120
requests:
121+
cpu: "16"
122+
memory: 16Gi
118123
nvidia.com/gpu: "1"
119124
volumeMounts:
120125
- name: model-storage
@@ -280,10 +285,15 @@ spec:
280285
fieldPath: status.podIP
281286
- name: VLLM_LOGGING_LEVEL
282287
value: DEBUG
288+
283289
resources:
284290
limits:
291+
cpu: "16"
292+
memory: 16Gi
285293
nvidia.com/gpu: "1"
286294
requests:
295+
cpu: "16"
296+
memory: 16Gi
287297
nvidia.com/gpu: "1"
288298
volumes:
289299
- name: model-storage

helm/examples/output-vllm-sim.yaml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,16 @@ spec:
9292
env:
9393
- name: HF_HOME
9494
value: /model-cache
95+
96+
resources:
97+
limits:
98+
{}
99+
requests:
100+
{}
95101
volumeMounts:
96102
- name: model-storage
97103
mountPath: /model-cache
98104
volumes:
99-
- name: model-storage
100-
emptyDir:
101-
sizeLimit: 5Mi
102105
---
103106
# Source: llm-d-modelservice/templates/epp-deployment.yaml
104107
apiVersion: apps/v1
@@ -223,13 +226,16 @@ spec:
223226
env:
224227
- name: HF_HOME
225228
value: /model-cache
229+
230+
resources:
231+
limits:
232+
{}
233+
requests:
234+
{}
226235
volumeMounts:
227236
- name: model-storage
228237
mountPath: /model-cache
229238
volumes:
230-
- name: model-storage
231-
emptyDir:
232-
sizeLimit: 5Mi
233239
---
234240
# Source: llm-d-modelservice/templates/routing.yaml
235241
apiVersion: gateway.networking.k8s.io/v1

helm/examples/values-facebook.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ routing:
1919

2020
modelArtifacts:
2121
uri: "hf://facebook/opt-125m"
22-
size: 5Mi
2322

2423
# describe decode pods
2524
decode:

helm/examples/values-vllm-sim.yaml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This values.yaml file creates the resources for facebook/opt-125m
1+
# This values.yaml file creates the resources for random
22

33
multinode: false # If true, creates LWS instead of deployments
44
inferencePool: true
@@ -7,7 +7,7 @@ httpRoute: true
77

88
routing:
99
# This is the model name for the OpenAI request
10-
modelName: facebook/opt-125m
10+
modelName: random
1111
servicePort: 8000 # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
1212
proxy:
1313
image: ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6
@@ -18,7 +18,7 @@ routing:
1818
name: llm-d-inference-gateway
1919

2020
modelArtifacts:
21-
uri: "ht://facebook/opt-125m"
21+
uri: "hf://random"
2222
size: 5Mi
2323

2424
# describe decode pods
@@ -29,7 +29,7 @@ decode:
2929
image: "ghcr.io/llm-d/llm-d-inference-sim:0.0.4"
3030
args:
3131
- "--model"
32-
- "facebook/opt-125m"
32+
- "random"
3333
- "--port"
3434
- "8200" # targetPort
3535
ports:
@@ -43,7 +43,7 @@ prefill:
4343
image: "ghcr.io/llm-d/llm-d-inference-sim:0.0.4"
4444
args:
4545
- "--model"
46-
- "facebook/opt-125m"
46+
- "random"
4747
- "--port"
4848
- "8000" # servicePort
4949
ports:
@@ -67,3 +67,12 @@ endpointPicker:
6767
autoscaling:
6868
enabled: false
6969
replicas: 1
70+
71+
72+
curl http://localhost:8000/v1/completions -vvv \
73+
-H "Content-Type: application/json" \
74+
-H "x-model-name: facebook/opt-125m" \
75+
-d '{
76+
"model": "facebook/opt-125m",
77+
"prompt": "Hello, "
78+
}'

helm/templates/epp-deployment.yaml

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -81,27 +81,27 @@ spec:
8181
- containerPort: 9090
8282
name: metrics
8383
protocol: TCP
84+
{{- if (not .Values.endpointPicker.disableReadinessProbe) }}
85+
readinessProbe:
86+
grpc:
87+
port: 9003
88+
service: envoy.service.ext_proc.v3.ExternalProcessor
89+
initialDelaySeconds: 5
90+
timeoutSeconds: 1
91+
periodSeconds: 10
92+
successThreshold: 1
93+
failureThreshold: 3
94+
{{- end }}
95+
{{- if (not .Values.endpointPicker.disableLivenessProbe) }}
96+
livenessProbe:
97+
grpc:
98+
port: 9003
99+
service: envoy.service.ext_proc.v3.ExternalProcessor
100+
initialDelaySeconds: 5
101+
timeoutSeconds: 1
102+
periodSeconds: 10
103+
successThreshold: 1
104+
failureThreshold: 3
105+
{{- end }}
84106
serviceAccount: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
85-
serviceAccountName: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
86-
{{- if (not .Values.endpointPicker.disableReadinessProbe) }}
87-
readinessProbe:
88-
grpc:
89-
port: 9003
90-
service: envoy.service.ext_proc.v3.ExternalProcessor
91-
initialDelaySeconds: 5
92-
timeoutSeconds: 1
93-
periodSeconds: 10
94-
successThreshold: 1
95-
failureThreshold: 3
96-
{{- end }}
97-
{{- if (not .Values.endpointPicker.disableLivenessProbe) }}
98-
livenessProbe:
99-
grpc:
100-
port: 9003
101-
service: envoy.service.ext_proc.v3.ExternalProcessor
102-
initialDelaySeconds: 5
103-
timeoutSeconds: 1
104-
periodSeconds: 10
105-
successThreshold: 1
106-
failureThreshold: 3
107-
{{- end }}
107+
serviceAccountName: {{ include "llm-d-modelservice.eppServiceAccountName" . }}

0 commit comments

Comments
 (0)