Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions helm/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
# ReadMEs
*.md
# Rendered templates
values-*.yaml
output-*.yaml
24 changes: 24 additions & 0 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: llm-d-modelservice
description: A Helm chart for ModelService

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.0.1"
203 changes: 203 additions & 0 deletions helm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "llm-d-modelservice.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "llm-d-modelservice.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "llm-d-modelservice.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "llm-d-modelservice.labels" -}}
helm.sh/chart: {{ include "llm-d-modelservice.chart" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/* Sanitized model name (DNS compliant) */}}
{{- define "llm-d-modelservice.sanitizedModelName" -}}
{{- $name := .Release.Name | lower | trim -}}
{{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}}
{{- $name = regexReplaceAll "^[\\-._]+" $name "" -}}
{{- $name = regexReplaceAll "[\\-._]+$" $name "" -}}
{{- $name = regexReplaceAll "\\." $name "-" -}}

{{- if gt (len $name) 63 -}}
{{- $name = substr 0 63 $name -}}
{{- end -}}

{{- $name -}}
{{- end }}

{{/* Common P/D labels */}}
{{- define "llm-d-modelservice.pdlabels" -}}
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: {{ (include "llm-d-modelservice.sanitizedModelName" .) -}}
{{- end }}

{{/* prefill labels */}}
{{- define "llm-d-modelservice.prefilllabels" -}}
{{ include "llm-d-modelservice.pdlabels" . }}
llm-d.ai/role: prefill
{{- end }}

{{/* decode labels */}}
{{- define "llm-d-modelservice.decodelabels" -}}
{{ include "llm-d-modelservice.pdlabels" . }}
llm-d.ai/role: decode
{{- end }}

{{/* affinity from acceleratorTypes */}}
{{- define "llm-d-modelservice.acceleratorTypes" -}}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: {{ .labelKey }}
operator: In
{{- with .labelValues }}
values:
{{- toYaml . | nindent 14 }}
{{- end }}
{{- end }}

{{/* Routing proxy -- sidecar for decode pods */}}
{{- define "llm-d-modelservice.routingProxy" -}}
initContainers:
- name: routing-proxy
args:
- --port={{ default 8080 .servicePort }}
- --vllm-port={{ default 8200 .proxy.targetPort }}
- --connector=nixlv2
- -v={{ default 5 .proxy.debugLevel }}
image: {{ .image }}
imagePullPolicy: Always
ports:
- containerPort: {{ default 8080 .servicePort }}
protocol: TCP
resources: {}
restartPolicy: Always
securityContext:
allowPrivilegeEscalation: false
runAsNonRoot: true
{{- end }}

{{- define "llm-d-modelservice.parallelism" -}}
{{- $parallelism := dict "tensor" 1 "data" 1 -}}
{{- if and . .tensor }}
{{- $parallelism = mergeOverwrite $parallelism (dict "tensor" .tensor) -}}
{{- end }}
{{- if and . .data }}
{{- $parallelism = mergeOverwrite $parallelism (dict "data" .data) -}}
{{- end }}
{{- $parallelism | toYaml | nindent 0 }}
{{- end }}

{{- define "llm-d-modelservice.resources" -}}
resources:
limits:
{{- $limits := dict -}}
{{- if and .resources .resources.limits -}}
{{- $limits = omit .resources.limits "nvidia.com/gpu" }}
{{- if gt (len $limits) 0 }}
{{- toYaml $limits | nindent 4 }}
{{- end }}
{{- end }}
nvidia.com/gpu: {{ .parallelism.tensor }}
requests:
{{- $requests := dict -}}
{{- if and .resources .resources.requests -}}
{{- $requests = omit .resources.requests "nvidia.com/gpu" }}
{{- end }}
{{- if gt (len $requests) 0 }}
{{- toYaml $requests | nindent 4 }}
{{- end }}
nvidia.com/gpu: {{ .parallelism.tensor }}
{{- end }}

{{/* P/D service account name */}}
{{- define "llm-d-modelservice.pdServiceAccountName" -}}
{{ include "llm-d-modelservice.sanitizedModelName" . }}-sa
{{- end }}

{{/* EPP service account name */}}
{{- define "llm-d-modelservice.eppServiceAccountName" -}}
{{ include "llm-d-modelservice.sanitizedModelName" . }}-epp-sa
{{- end }}

{{/*
EPP selector labels
*/}}
{{- define "llm-d-modelservice.eppSelectorLabels" -}}
app.kubernetes.io/name: {{ include "llm-d-modelservice.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
llm-d.ai/epp: {{ include "llm-d-modelservice.fullname" . }}-epp
{{- end }}

{{/*
Volumes for PD containers based on model artifact prefix
*/}}
{{- define "llm-d-modelservice.mountModelVolumeVolumes" -}}
{{- if eq .Values.modelArtifacts.prefix "hf" -}}
- name: model-storage
emptyDir:
sizeLimit: {{ default "0" .Values.modelArtifacts.size }}
{{- else if eq .Values.modelArtifacts.prefix "pvc" }}
- name: model-storage
persistentVolumeClaim:
claimName: {{ .Values.modelArtifacts.artifact }}
readOnly: true
{{- else if eq .Values.modelArtifacts.prefix "oci" }}
- name: model-storage
image:
reference: {{ .Values.modelArtifacts.artifact }}
pullPolicy: {{ default "Always" .Values.modelArtifacts.imagePullPolicy }}
{{- end }}
{{- end }}

{{/*
VolumeMount for a PD container
Supplies model-storage mount if mountModelVolume: true for the container
*/}}
{{- define "llm-d-modelservice.mountModelVolumeVolumeMounts" -}}
{{- if or .volumeMounts .mountModelVolume }}
volumeMounts:
{{- end }}
{{- /* user supplied volume mount in values */}}
{{- with .volumeMounts }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- /* what we add if mounModelVolume is true */}}
{{- if .mountModelVolume }}
- name: model-storage
mountPath: /model-cache
{{- end }}
{{- end }}
106 changes: 106 additions & 0 deletions helm/templates/decode-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
{{- $parallelism := (include "llm-d-modelservice.parallelism" .Values.decode.parallelism) | fromYaml -}}
{{- if .Values.decode }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "llm-d-modelservice.fullname" . }}-decode
labels:
{{- include "llm-d-modelservice.labels" . | nindent 4 }}
spec:
replicas: {{ default 1 .Values.decode.replicas }}
selector:
matchLabels:
{{- include "llm-d-modelservice.decodelabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "llm-d-modelservice.decodelabels" . | nindent 8 }}
spec:
{{- with .Values.decode.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "llm-d-modelservice.pdServiceAccountName" . }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.decode.acceleratorTypes }}
{{- include "llm-d-modelservice.acceleratorTypes" . | nindent 6 }}
{{- end }}
{{- /* initContainers */}}
{{- with .Values.decode.initContainers }}
initContainers:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- /* range $.Values.decode.containers */}}
{{- with .Values.decode.containers }}
containers:
{{- range . }}
- name: {{ default "vllm" .name }}
image: {{ required "image of container is required" .image }}
{{- with .securityContext }}
securityContext:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .imagePullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
{{- with .command }}
command:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .args }}
args:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- /* insert user's env for this container */}}
{{- if or .env .mountModelVolume }}
env:
{{- end }}
{{- with .env }}
{{- toYaml . | nindent 10 }}
{{- end }}
{{- /* insert envs based on what modelArtifact prefix */}}
{{- if .mountModelVolume }}
- name: HF_HOME
value: /model-cache
{{- with $.Values.modelArtifacts.authSecretName }}
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: {{ . }}
key: HF_TOKEN
{{- end }}
{{- end }}
{{- with .livenessProbe }}
livenessProbe:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .readinessProbe }}
readinessProbe:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- with .resources }}
resources:
limits:
{{- if .limits -}}
{{- omit .limits "nvidia.com/gpu" | toYaml | nindent 12 }}
{{- end }}
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
requests:
{{- if .limits -}}
{{- omit .requests "nvidia.com/gpu" | toYaml | nindent 12 }}
{{- end }}
{{- /* nvidia.com/gpu: "{{ $parallelism.tensor }}" */}}
{{- end }}
{{- /* volumeMount */}}
{{- include "llm-d-modelservice.mountModelVolumeVolumeMounts" . | nindent 8 }}
{{- end }}
{{- end }}
volumes:
{{- with .Values.decode.volumes }}
{{- toYaml . | nindent 8 }}
{{- end -}}
{{- include "llm-d-modelservice.mountModelVolumeVolumes" . | nindent 8 }}
{{- end }}
Loading