Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.

Commit c031edd

Browse files
authored
Merge pull request #1 from jgchn/helm-mk-1
epp things
2 parents 3cd169b + 9c4102f commit c031edd

File tree

8 files changed

+287
-56
lines changed

8 files changed

+287
-56
lines changed

helm/templates/NOTES.txt

Lines changed: 0 additions & 22 deletions
This file was deleted.

helm/templates/_helpers.tpl

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,39 @@ Common labels
3535
*/}}
3636
{{- define "llm-d-modelservice.labels" -}}
3737
helm.sh/chart: {{ include "llm-d-modelservice.chart" . }}
38-
{{ include "llm-d-modelservice.selectorLabels" . }}
38+
{{ include "llm-d-modelservice.eppSelectorLabels" . }}
3939
{{- if .Chart.AppVersion }}
4040
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
4141
{{- end }}
4242
app.kubernetes.io/managed-by: {{ .Release.Service }}
4343
{{- end }}
4444

4545
{{/*
46-
Selector labels
46+
EPP selector labels
4747
*/}}
48-
{{- define "llm-d-modelservice.selectorLabels" -}}
48+
{{- define "llm-d-modelservice.eppSelectorLabels" -}}
4949
app.kubernetes.io/name: {{ include "llm-d-modelservice.name" . }}
5050
app.kubernetes.io/instance: {{ .Release.Name }}
51+
llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
5152
{{- end }}
5253

5354
{{/*
5455
Create the name of the service account to use
5556
*/}}
5657
{{- define "llm-d-modelservice.serviceAccountName" -}}
5758
{{- if .Values.serviceAccount.create }}
58-
{{- default (include "llm-d-modelservice.fullname" .) .Values.serviceAccount.name }}
59+
{{- (include "llm-d-modelservice.fullname" .) -}}-sa
60+
{{- else }}
61+
{{- default "default" .Values.serviceAccount.name }}
62+
{{- end }}
63+
{{- end }}
64+
65+
{{/*
66+
Create the name of the EPP service account to use
67+
*/}}
68+
{{- define "llm-d-modelservice.eppServiceAccountName" -}}
69+
{{- if .Values.serviceAccount.create }}
70+
{{- (include "llm-d-modelservice.fullname" .) -}}-epp-sa
5971
{{- else }}
6072
{{- default "default" .Values.serviceAccount.name }}
6173
{{- end }}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ include "llm-d-modelservice.fullname" . }}-epp
5+
labels:
6+
{{- include "llm-d-modelservice.labels" . | nindent 4 }}
7+
spec:
8+
{{- if not .Values.endpointPicker.autoscaling.enabled }}
9+
replicas: {{ .Values.endpointPicker.replicaCount }}
10+
{{- end }}
11+
selector:
12+
matchLabels:
13+
{{- include "llm-d-modelservice.eppSelectorLabels" . | nindent 6 }}
14+
template:
15+
metadata:
16+
labels:
17+
{{- include "llm-d-modelservice.labels" . | nindent 8 }}
18+
spec:
19+
{{- with .Values.endpointPicker.imagePullSecrets }}
20+
imagePullSecrets:
21+
{{- toYaml . | nindent 8 }}
22+
{{- end }}
23+
serviceAccountName: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
24+
{{- with .Values.endpointPicker.podSecurityContext }}
25+
securityContext:
26+
{{- toYaml . | nindent 8 }}
27+
{{- end }}
28+
{{- range $.Values.endpointPicker.containers }}
29+
containers:
30+
- name: {{ .name }}
31+
{{- with $.Values.endpointPicker.securityContext }}
32+
securityContext:
33+
{{- toYaml . | nindent 12 }}
34+
{{- end }}
35+
image: "{{ .image }}"
36+
{{- with .imagePullPolicy }}
37+
imagePullPolicy: {{ . }}
38+
{{- end }}
39+
{{- with .command }}
40+
command:
41+
{{- toYaml . | nindent 12 }}
42+
{{- end }}
43+
args:
44+
- poolName
45+
- POOLNAME
46+
- poolNamespace
47+
- {{ $.Release.Namespace }}
48+
{{- with .args }}
49+
{{- toYaml . | nindent 12 }}
50+
{{- end }}
51+
{{- with .env }}
52+
env:
53+
{{- toYaml . | nindent 12 }}
54+
{{- end }}
55+
ports:
56+
- name: http2
57+
containerPort: {{ $.Values.endpointPicker.service.port }}
58+
protocol: TCP
59+
{{- with .livenessProbe }}
60+
livenessProbe:
61+
{{- toYaml . | nindent 12 }}
62+
{{- end }}
63+
{{- with .readinessProbe }}
64+
readinessProbe:
65+
{{- toYaml . | nindent 12 }}
66+
{{- end }}
67+
{{- with .resources }}
68+
resources:
69+
{{- toYaml . | nindent 12 }}
70+
{{- end }}
71+
{{- with .volumeMounts }}
72+
volumeMounts:
73+
{{- toYaml . | nindent 12 }}
74+
{{- end }}
75+
{{- end }} {{/* range $.Values.endpointPicker.containers */}}
76+
{{- with .Values.endpointPicker.volumes }}
77+
volumes:
78+
{{- toYaml . | nindent 8 }}
79+
{{- end }}
80+
{{- with .Values.endpointPicker.nodeSelector }}
81+
nodeSelector:
82+
{{- toYaml . | nindent 8 }}
83+
{{- end }}
84+
{{- with .Values.endpointPicker.affinity }}
85+
affinity:
86+
{{- toYaml . | nindent 8 }}
87+
{{- end }}
88+
{{- with .Values.endpointPicker.tolerations }}
89+
tolerations:
90+
{{- toYaml . | nindent 8 }}
91+
{{- end }}

helm/templates/epp-sa.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{{- if .Values.serviceAccount.create -}}
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
name: {{ include "llm-d-modelservice.eppServiceAccountName" . }}
6+
labels:
7+
{{- include "llm-d-modelservice.labels" . | nindent 4 }}
8+
{{- with .Values.eppServiceAccount.annotations }}
9+
annotations:
10+
{{- toYaml . | nindent 4 }}
11+
{{- end }}
12+
automountServiceAccountToken: {{ .Values.eppServiceAccount.automount }}
13+
{{- end }}

helm/templates/epp-service.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: {{ include "llm-d-modelservice.fullname" . }}-epp
5+
labels:
6+
{{- include "llm-d-modelservice.labels" . | nindent 4 }}
7+
spec:
8+
type: {{ .Values.service.type }}
9+
ports:
10+
- port: {{ .Values.endpointPicker.service.port }}
11+
targetPort: {{ .Values.endpointPicker.service.targetPort }}
12+
protocol: TCP
13+
appProtocol: {{ .Values.endpointPicker.service.appProtocol }}
14+
selector:
15+
{{- include "llm-d-modelservice.eppSelectorLabels" . | nindent 4 }}

helm/templates/service.yaml

Lines changed: 0 additions & 15 deletions
This file was deleted.

helm/templates/tests/test-connection.yaml

Lines changed: 0 additions & 15 deletions
This file was deleted.

helm/values-msvc-mk.yaml

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# TODO
2+
# decoupleScaling: false
3+
4+
lws: false # If true, creates LWS instead of deployments
5+
inferencePool: true
6+
inferenceModel: true
7+
httpRoute: true
8+
9+
routing:
10+
# This is the model name for the OpenAI request
11+
modelName: facebook/opt-125m
12+
ports:
13+
servicePort: 8000 # Sidecar listens on this port for requests. If there's no sidecar, the request goes here
14+
internalPort: 8200 # Sidecar forwards request to vllm container on this port
15+
16+
modelArtifacts:
17+
# When specfying the URI with `hf` prefix, the <repo-id>/<model-id> string
18+
# is extracted and exposed as a template variable that can be used as {{ .HFModelName }}
19+
20+
# uri: hf://facebook/opt-125m
21+
type: hf # oneOf ["hf", "oci", "pvc"]
22+
artficat: facebook/opt-125m
23+
authSecretName: "hf-secret"
24+
size: 5Mi
25+
26+
# describe decode pods
27+
decode:
28+
enableService: false
29+
replicas: 1
30+
31+
# for LWS
32+
parallelism:
33+
tensor: 8
34+
data: 16
35+
dataLocal: 1
36+
37+
acceleratorTypes:
38+
labelKey: nvidia.com/gpu.product
39+
labelValues:
40+
# According to the blog, Scout requires H100s
41+
- NVIDIA-H100
42+
# initContainers:
43+
containers:
44+
- name: "vllm"
45+
image: "vllm-ai/vllm:latest"
46+
args:
47+
- "HFModelName"
48+
env:
49+
- name: "VLLM_LOG_LEVEL"
50+
value: "DEBUG" # Set to DEBUG for more detailed logs, or INFO for less verbose logs
51+
envFrom:
52+
- configMapRef:
53+
name: vllm-config
54+
resources:
55+
requests:
56+
cpu: "1" # Request 1 CPU core
57+
memory: "4Gi" # Request 4 GiB of memory
58+
limits:
59+
cpu: "2" # Limit to 2 CPU cores
60+
memory: "8Gi" # Limit to 8 GiB of memory
61+
mountModelVolume: true
62+
63+
# describe the prefill pods (looks the same as above)
64+
prefill:
65+
replicas: 1
66+
containers:
67+
- name: "vllm"
68+
args:
69+
- "HFModelName"
70+
71+
endpointPicker:
72+
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
73+
service:
74+
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
75+
type: ClusterIP
76+
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
77+
port: 9002
78+
targetPort: 9002
79+
appProtocol: http2
80+
81+
# enableService: true
82+
83+
autoscaling:
84+
enabled: false
85+
replicas: 1
86+
87+
containers:
88+
- name: "epp"
89+
image: "ghcr.io/llm-d/llm-d-inference-scheduler:0.0.3"
90+
# command:
91+
args:
92+
# - -poolName
93+
# - InferencePoolName
94+
# - -poolNamespace
95+
# - llmd-kalantar
96+
- -v
97+
- "5"
98+
- --zap-encoder
99+
- json
100+
- -grpcPort
101+
- "9002"
102+
- -grpcHealthPort
103+
- "9003"
104+
env:
105+
- name: PD_ENABLED
106+
value: "true"
107+
- name: PD_PROMPT_LEN_THRESHOLD
108+
value: "10"
109+
ports:
110+
- containerPort: 9002
111+
protocol: TCP
112+
- containerPort: 9003
113+
protocol: TCP
114+
- containerPort: 9090
115+
name: metrics
116+
protocol: TCP
117+
livenessProbe:
118+
failureThreshold: 3
119+
grpc:
120+
port: 9003
121+
service: envoy.service.ext_proc.v3.ExternalProcessor
122+
initialDelaySeconds: 5
123+
periodSeconds: 10
124+
readinessProbe:
125+
failureThreshold: 3
126+
grpc:
127+
port: 9003
128+
service: envoy.service.ext_proc.v3.ExternalProcessor
129+
initialDelaySeconds: 5
130+
periodSeconds: 10
131+
132+
133+
134+
135+
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
136+
serviceAccount:
137+
# Specifies whether a service account should be created
138+
create: true
139+
# Automatically mount a ServiceAccount's API credentials?
140+
automount: true
141+
# Annotations to add to the service account
142+
annotations: {}
143+
144+
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
145+
eppServiceAccount:
146+
# Specifies whether a service account should be created
147+
create: true
148+
# Automatically mount a ServiceAccount's API credentials?
149+
automount: true
150+
# Annotations to add to the service account
151+
annotations: {}
152+

0 commit comments

Comments
 (0)