Skip to content

Commit 03d9418

Browse files
committed
Add benchmarking folder with common config set ups
1 parent cbb8928 commit 03d9418

File tree

11 files changed

+1053
-18
lines changed

11 files changed

+1053
-18
lines changed

benchmarking/benchmark-values.yaml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
job:
2+
image:
3+
repository: quay.io/inference-perf/inference-perf
4+
tag: "latest" # Defaults to .Chart.AppVersion
5+
serviceAccountName: ""
6+
nodeSelector: {}
7+
# Example resources:
8+
# resources:
9+
# requests:
10+
# cpu: "1"
11+
# memory: "4Gi"
12+
# limits:
13+
# cpu: "2"
14+
# memory: "8Gi"
15+
resources: {}
16+
17+
logLevel: INFO
18+
19+
# A GCS bucket path that points to the dataset file.
20+
# The file will be copied from this path to the local file system
21+
# at /dataset/dataset.json for use during the run.
22+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
23+
gcsPath: ""
24+
25+
# hfToken optionally creates a secret with the specified token.
26+
# Can be set using helm install --set hftoken=<token>
27+
hfToken: ""
28+
29+
config:
30+
load:
31+
type: constant
32+
interval: 15
33+
stages:
34+
- rate: 10
35+
duration: 20
36+
- rate: 20
37+
duration: 20
38+
- rate: 30
39+
duration: 20
40+
api:
41+
type: completion
42+
streaming: true
43+
server:
44+
type: vllm
45+
model_name: meta-llama/Llama-3.1-8B-Instruct
46+
base_url: http://0.0.0.0:8000
47+
ignore_eos: true
48+
tokenizer:
49+
pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
50+
data:
51+
type: shareGPT
52+
metrics:
53+
type: prometheus
54+
prometheus:
55+
google_managed: true
56+
report:
57+
request_lifecycle:
58+
summary: true
59+
per_stage: true
60+
per_request: true
61+
prometheus:
62+
summary: true
63+
per_stage: true

benchmarking/benchmark.ipynb

Lines changed: 596 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
3+
# Downloads a file from a GCS bucket.
4+
5+
# Check if GCS_BUCKET is provided as an argument
6+
if [ -z "$1" ]; then
7+
echo "Usage: $0 <GCS_BUCKET> [GCS_FOLDER_PATH:DEFAULT=benchmark_results]"
8+
exit 1
9+
fi
10+
11+
GCS_BUCKET="$1"
12+
GCS_FOLDER_PATH="${2:-benchmark_results/}" # Default to benchmark_results/ if not provided
13+
14+
# Env vars to be passed when calling this script.
15+
# The id of the benchmark. This is needed to identify what the benchmark is for.
16+
# It decides the filepath to save the results, which later is used by the jupyter notebook to assign
17+
# the benchmark_id as data labels for plotting.
18+
benchmark_id=${benchmark_id:-"inference-extension"}
19+
# run_id can be used to group different runs of the same benchmarks for comparison.
20+
run_id=${run_id:-"default-run"}
21+
output_dir=${output_dir:-'output'}
22+
23+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
24+
benchmark_output_dir=${SCRIPT_DIR}/${output_dir}/${run_id}/${benchmark_id}
25+
26+
echo "Creating output directory: ${benchmark_output_dir}/results/json/"
27+
mkdir -p "${benchmark_output_dir}/results/json/"
28+
29+
echo "Downloading gs://${GCS_BUCKET}/${GCS_FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
30+
gsutil cp -r "gs://${GCS_BUCKET}/${GCS_FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
31+
32+
echo "Download complete."
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: v2
2+
name: inference-perf
3+
description: A Helm chart for running inference-perf benchmarking tool
4+
type: application
5+
version: 0.2.0
6+
appVersion: "0.2.0"
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
## 🚀 Deploying `inference-perf` via Helm Chart
2+
3+
This guide explains how to deploy `inference-perf` to a Kubernetes cluster with Helm.
4+
5+
Note: This is a temporary chart added until remote chart is available.
6+
7+
---
8+
9+
### 1. Prerequisites
10+
11+
Make sure you have the following tools installed and configured:
12+
13+
* **Kubernetes Cluster:** Access to a functional cluster (e.g., GKE).
14+
* **Helm:** The Helm CLI installed locally.
15+
16+
---
17+
18+
### 2. Configuration (`values.yaml`)
19+
20+
Before deployment, navigate to the **`deploy/inference-perf`** directory and edit the **`values.yaml`** file to customize your deployment and the benchmark parameters.
21+
22+
#### Optional Parameters
23+
24+
| Key | Description | Default |
25+
| :--- | :--- | :--- |
26+
| `hfToken` | Hugging Face API token. If provided, a Kubernetes `Secret` named `hf-token-secret` will be created for authentication. | `""` |
27+
| `serviceAccountName` | Standard Kubernetes `serviceAccountName`. If not provided, default service account is used. | `""` |
28+
| `nodeSelector` | Standard Kubernetes `nodeSelector` map to constrain pod placement to nodes with matching labels. | `{}` |
29+
| `resources` | Standard Kubernetes resource requests and limits for the main `inference-perf` container. | `{}` |
30+
---
31+
32+
> **Example Resource Block:**
33+
> ```yaml
34+
> # resources:
35+
> # requests:
36+
> # cpu: "1"
37+
> # memory: "4Gi"
38+
> # limits:
39+
> # cpu: "2"
40+
> # memory: "8Gi"
41+
> ```
42+
43+
#### GKE Specific Parameters
44+
45+
This section details the necessary configuration and permissions for using a Google Cloud Storage (GCS) path to manage your dataset, typical for deployments on GKE.
46+
47+
##### Required IAM Permissions
48+
49+
The identity executing the workload (e.g., the associated Kubernetes Service Account, often configured via **Workload Identity**) must possess the following IAM roles on the target GCS bucket for data transfer:
50+
51+
* **`roles/storage.objectViewer`** (Required to read/download the input dataset from GCS).
52+
* **`roles/storage.objectCreator`** (Required to write/push benchmark results back to GCS).
53+
54+
55+
| Key | Description | Default |
56+
| :--- | :--- | :--- |
57+
| `gcsPath` | A GCS URI pointing to the dataset file (e.g., `gs://my-bucket/dataset.json`). The file will be automatically copied to the running pod during initialization. | `""` |
58+
59+
---
60+
61+
### 3. Run Deployment
62+
63+
Use the **`helm install`** command from the **`deploy/inference-perf`** directory to deploy the chart.
64+
65+
* **Standard Install:** Deploy using the default `values.yaml`.
66+
```bash
67+
helm install test .
68+
```
69+
70+
* **Set `hfToken` Override:** Pass the Hugging Face token directly.
71+
```bash
72+
helm install test . --set hfToken="<TOKEN>"
73+
```
74+
75+
* **Custom Config Override:** Make changes to the values file for custom settings.
76+
```bash
77+
helm install test . -f values.yaml
78+
```
79+
80+
### 4. Cleanup
81+
82+
To remove the benchmark deployment.
83+
```bash
84+
helm uninstall test
85+
```
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "inference-perf.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "inference-perf.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "inference-perf.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "inference-perf.labels" -}}
37+
helm.sh/chart: {{ include "inference-perf.chart" . }}
38+
{{ include "inference-perf.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "inference-perf.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "inference-perf.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Common Secret Name for HuggingFace credentials
55+
*/}}
56+
{{- define "inference-perf.hfSecret" -}}
57+
{{ include "inference-perf.fullname" . }}-hf-secret
58+
{{- end -}}
59+
60+
{{/*
61+
Common Secret Key for HuggingFace credentials
62+
*/}}
63+
{{- define "inference-perf.hfKey" -}}
64+
{{ include "inference-perf.fullname" . }}-hf-key
65+
{{- end -}}
66+
67+
{{/*
68+
Mount path for config map
69+
*/}}
70+
{{- define "inference-perf.configMount" -}}
71+
/cfg
72+
{{- end -}}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# inference-perf/templates/configmap.yaml
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: {{ include "inference-perf.fullname" . }}-config
6+
labels:
7+
{{- include "inference-perf.labels" . | nindent 4 }}
8+
data:
9+
config.yml: |
10+
{{- toYaml .Values.config | nindent 4 }}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# inference-perf/templates/job.yaml
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: {{ include "inference-perf.fullname" . }}-job
6+
labels:
7+
{{- include "inference-perf.labels" . | nindent 4 }}
8+
app: inference-perf
9+
spec:
10+
template:
11+
metadata:
12+
labels:
13+
{{- include "inference-perf.selectorLabels" . | nindent 8 }}
14+
app: inference-perf
15+
spec:
16+
restartPolicy: Never
17+
serviceAccountName: {{ .Values.job.serviceAccountName }}
18+
{{- with .Values.job.nodeSelector }}
19+
nodeSelector:
20+
{{- toYaml . | nindent 8 }}
21+
{{- end }}
22+
{{- if .Values.gcsPath}}
23+
initContainers:
24+
- name: fetch-dataset
25+
image: google/cloud-sdk:latest
26+
command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/dataset.json"]
27+
volumeMounts:
28+
- name: dataset-volume
29+
mountPath: /dataset
30+
{{- end }}
31+
containers:
32+
- name: inference-perf-container
33+
image: "{{ .Values.job.image.repository }}:{{ .Values.job.image.tag | default .Chart.AppVersion }}"
34+
command: ["inference-perf"]
35+
args:
36+
- "--config_file"
37+
- "{{ include "inference-perf.configMount" . }}/config.yml"
38+
- "--log-level"
39+
- {{ .Values.logLevel }}
40+
env:
41+
{{- if .Values.hfToken }}
42+
- name: HF_TOKEN
43+
valueFrom:
44+
secretKeyRef:
45+
name: {{ include "inference-perf.hfSecret" . }}
46+
key: {{ include "inference-perf.hfKey" . }}
47+
{{- end }}
48+
volumeMounts:
49+
- name: config-volume
50+
mountPath: {{ include "inference-perf.configMount" . }}
51+
readOnly: true
52+
resources:
53+
{{- toYaml .Values.job.resources | nindent 12 }}
54+
volumes:
55+
- name: config-volume
56+
configMap:
57+
name: {{ include "inference-perf.fullname" . }}-config
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# inference-perf/templates/secret.yaml
2+
{{- if .Values.hfToken }}
3+
apiVersion: v1
4+
kind: Secret
5+
metadata:
6+
name: {{ include "inference-perf.hfSecret" . }}
7+
labels:
8+
{{- include "inference-perf.labels" . | nindent 4 }}
9+
type: Opaque
10+
stringData:
11+
{{ include "inference-perf.hfKey" . }}: {{ .Values.hfToken | quote }}
12+
{{- end }}

0 commit comments

Comments
 (0)