Skip to content

Commit c3d19f8

Browse files
adding chart text-generation-inference-0.2.1
1 parent 9d7a13f commit c3d19f8

File tree

13 files changed

+529
-0
lines changed

13 files changed

+529
-0
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: v2
2+
appVersion: 1.16.0
3+
description: A Rust, Python and gRPC server for text generation inference by huggingface
4+
on Intel GPUs.
5+
maintainers:
6+
7+
name: tylertitsworth
8+
url: https://github.com/tylertitsworth
9+
name: text-generation-inference
10+
type: application
11+
version: 0.2.1
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Text Generation Inference on Intel GPU
2+
3+
A Rust, Python and gRPC server for text generation inference by huggingface on Intel GPUs.
4+
5+
For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel).
6+
7+
> [!TIP]
8+
> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
9+
10+
![Version: 0.2.1](https://img.shields.io/badge/Version-0.2.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
11+
12+
## Values
13+
14+
| Key | Type | Default | Description |
15+
|-----|------|---------|-------------|
16+
| deploy.configMap | object | `{"enabled":true,"name":"tgi-config"}` | ConfigMap of Environment Variables |
17+
| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image |
18+
| deploy.replicaCount | int | `1` | Number of pods |
19+
| deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration |
20+
| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration |
21+
| fullnameOverride | string | `""` | Full qualified Domain Name |
22+
| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration |
23+
| nameOverride | string | `""` | Name of the serving service |
24+
| pvc | object | `{"create":true,"name":"model-server-cache","size":"15Gi","storageClassName":"nil"}` | Configure Storage Volume |
25+
| pvc.create | bool | `true` | Create a new PVC |
26+
| pvc.name | string | `"model-server-cache"` | Specify the name of either an existing or new PVC |
27+
| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token |
28+
| securityContext | object | `{}` | Security Context Configuration |
29+
| service | object | `{"port":80,"type":"NodePort"}` | Service configuration |
30+
31+
----------------------------------------------
32+
Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Text Generation Inference on Intel GPU
2+
3+
{{ template "chart.description" . }}
4+
5+
For more information about how to use Huggingface text-generation-inference with Intel optimizations, check out [huggingface's documentation](https://huggingface.co/docs/text-generation-inference/installation_intel).
6+
7+
> [!TIP]
8+
> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
9+
10+
{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
11+
12+
{{ template "chart.requirementsSection" . }}
13+
14+
{{ template "chart.valuesSection" . }}
15+
16+
{{ template "helm-docs.versionFooter" . }}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
1. Get the application URL by running these commands:
2+
{{- if .Values.ingress.enabled }}
3+
{{- range $host := .Values.ingress.hosts }}
4+
{{- range .paths }}
5+
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
6+
{{- end }}
7+
{{- end }}
8+
{{- else if contains "NodePort" .Values.service.type }}
9+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }})
10+
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11+
echo http://$NODE_IP:$NODE_PORT
12+
{{- else if contains "LoadBalancer" .Values.service.type }}
13+
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14+
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}'
15+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16+
echo http://$SERVICE_IP:{{ .Values.service.port }}
17+
{{- else if contains "ClusterIP" .Values.service.type }}
18+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19+
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
21+
{{- end }}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
{{/*
16+
Expand the name of the chart.
17+
*/}}
18+
{{- define "tgi.name" -}}
19+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
20+
{{- end }}
21+
22+
{{/*
23+
Create a default fully qualified app name.
24+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
25+
If release name contains chart name it will be used as a full name.
26+
*/}}
27+
{{- define "tgi.fullname" -}}
28+
{{- if .Values.fullnameOverride }}
29+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
30+
{{- else }}
31+
{{- $name := default .Chart.Name .Values.nameOverride }}
32+
{{- if contains $name .Release.Name }}
33+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
34+
{{- else }}
35+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
36+
{{- end }}
37+
{{- end }}
38+
{{- end }}
39+
40+
{{/*
41+
Create chart name and version as used by the chart label.
42+
*/}}
43+
{{- define "tgi.chart" -}}
44+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
45+
{{- end }}
46+
47+
{{/*
48+
Common labels
49+
*/}}
50+
{{- define "tgi.labels" -}}
51+
helm.sh/chart: {{ include "tgi.chart" . }}
52+
{{ include "tgi.selectorLabels" . }}
53+
{{- if .Chart.AppVersion }}
54+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
55+
{{- end }}
56+
app.kubernetes.io/managed-by: {{ .Release.Service }}
57+
{{- end }}
58+
59+
{{/*
60+
Selector labels
61+
*/}}
62+
{{- define "tgi.selectorLabels" -}}
63+
app.kubernetes.io/name: {{ include "tgi.name" . }}
64+
app.kubernetes.io/instance: {{ .Release.Name }}
65+
{{- end }}
66+
67+
{{/*
68+
Create the name of the service account to use
69+
*/}}
70+
{{- define "tgi.serviceAccountName" -}}
71+
{{- if .Values.serviceAccount.create }}
72+
{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
73+
{{- else }}
74+
{{- default "default" .Values.serviceAccount.name }}
75+
{{- end }}
76+
{{- end }}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: apps/v1
16+
kind: Deployment
17+
metadata:
18+
name: {{ include "tgi.fullname" . }}
19+
labels:
20+
{{- include "tgi.labels" . | nindent 4 }}
21+
spec:
22+
replicas: {{ .Values.deploy.replicaCount }}
23+
selector:
24+
matchLabels:
25+
{{- include "tgi.selectorLabels" . | nindent 6 }}
26+
template:
27+
metadata:
28+
labels:
29+
{{- include "tgi.selectorLabels" . | nindent 8 }}
30+
spec:
31+
hostIPC: true
32+
containers:
33+
- name: {{ .Chart.Name }}
34+
args:
35+
- '-p'
36+
- {{ .Values.service.port | quote }}
37+
- '--cuda-graphs=0'
38+
envFrom:
39+
{{- if eq .Values.deploy.configMap.enabled true }}
40+
- configMapRef:
41+
name: {{ .Values.deploy.configMap.name }}
42+
{{- end }}
43+
- secretRef:
44+
name: {{ .Release.Name }}-hf-token
45+
# env:
46+
# - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
47+
# value: /data/numba_cache
48+
image: {{ .Values.deploy.image }}
49+
livenessProbe:
50+
failureThreshold: 10
51+
initialDelaySeconds: 5
52+
periodSeconds: 5
53+
tcpSocket:
54+
port: http
55+
readinessProbe:
56+
initialDelaySeconds: 5
57+
periodSeconds: 5
58+
tcpSocket:
59+
port: http
60+
startupProbe:
61+
failureThreshold: 120
62+
initialDelaySeconds: 20
63+
periodSeconds: 5
64+
tcpSocket:
65+
port: http
66+
ports:
67+
- name: http
68+
containerPort: {{ .Values.service.port }}
69+
protocol: TCP
70+
resources:
71+
{{- toYaml .Values.deploy.resources | nindent 12 }}
72+
securityContext:
73+
{{ toYaml .Values.securityContext | nindent 12 }}
74+
volumeMounts:
75+
- mountPath: /dev/shm
76+
name: dshm
77+
- mountPath: /data
78+
name: hf-data
79+
- mountPath: /tmp
80+
name: tmp
81+
volumes:
82+
- name: dshm
83+
emptyDir:
84+
medium: Memory
85+
- name: hf-data
86+
persistentVolumeClaim:
87+
claimName: {{ .Values.pvc.name }}
88+
- name: tmp
89+
emptyDir: {}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
{{- if .Values.ingress.enabled -}}
16+
{{- $fullName := include "tgi.fullname" . -}}
17+
{{- $svcPort := .Values.service.port -}}
18+
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
19+
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
20+
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
21+
{{- end }}
22+
{{- end }}
23+
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
24+
apiVersion: networking.k8s.io/v1
25+
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
26+
apiVersion: networking.k8s.io/v1beta1
27+
{{- else -}}
28+
apiVersion: extensions/v1beta1
29+
{{- end }}
30+
kind: Ingress
31+
metadata:
32+
name: {{ $fullName }}
33+
labels:
34+
{{- include "tgi.labels" . | nindent 4 }}
35+
{{- with .Values.ingress.annotations }}
36+
annotations:
37+
kubernetes.io/ingress.allow-http: "false"
38+
{{- toYaml . | nindent 4 }}
39+
{{- end }}
40+
spec:
41+
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
42+
ingressClassName: {{ .Values.ingress.className }}
43+
{{- end }}
44+
{{- if .Values.ingress.tls }}
45+
tls:
46+
{{- range .Values.ingress.tls }}
47+
- hosts:
48+
{{- range .hosts }}
49+
- {{ . | quote }}
50+
{{- end }}
51+
secretName: {{ .secretName }}
52+
{{- end }}
53+
{{- end }}
54+
rules:
55+
{{- range .Values.ingress.hosts }}
56+
- host: {{ .host | quote }}
57+
http:
58+
paths:
59+
{{- range .paths }}
60+
- path: {{ .path }}
61+
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
62+
pathType: {{ .pathType }}
63+
{{- end }}
64+
backend:
65+
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
66+
service:
67+
name: {{ $fullName }}
68+
port:
69+
number: {{ $svcPort }}
70+
{{- else }}
71+
serviceName: {{ $fullName }}
72+
servicePort: {{ $svcPort }}
73+
{{- end }}
74+
{{- end }}
75+
{{- end }}
76+
{{- end }}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
{{- if .Values.pvc.create -}}
16+
---
17+
apiVersion: v1
18+
kind: PersistentVolumeClaim
19+
metadata:
20+
name: {{ .Values.pvc.name }}
21+
labels:
22+
{{- include "tgi.labels" . | nindent 4 }}
23+
spec:
24+
{{- if .Values.pvc.storageClassName }}
25+
storageClassName: {{ .Values.pvc.storageClassName }}
26+
{{- end }}
27+
accessModes:
28+
- ReadWriteMany
29+
resources:
30+
requests:
31+
storage: {{ .Values.pvc.size }}
32+
{{- end }}

0 commit comments

Comments
 (0)