kubernetes-sigs · k8s-ci-robot · Nov 5, 2025 · Oct 6, 2025
diff --git a/benchmarking/benchmark-values.yaml b/benchmarking/benchmark-values.yaml
@@ -0,0 +1,63 @@
+job:
+  image:
+    repository: quay.io/inference-perf/inference-perf
+    tag: "latest" # Defaults to .Chart.AppVersion
+  serviceAccountName: ""
+  nodeSelector: {}
+  # Example resources:
+  # resources:
+  #   requests:
+  #     cpu: "1"
+  #     memory: "4Gi"
+  #   limits:
+  #     cpu: "2"
+  #     memory: "8Gi"
+  resources: {}
+
+logLevel: INFO
+
+# A GCS bucket path that points to the dataset file.
+# The file will be copied from this path to the local file system
+# at /dataset/dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
+gcsPath: ""
+
+# hfToken optionally creates a secret with the specified token.
+# Can be set using helm install --set hftoken=<token>
+hfToken: ""
+
+config:
+  load:
+    type: constant
+    interval: 15
+    stages:
+    - rate: 10
+      duration: 20
+    - rate: 20
+      duration: 20
+    - rate: 30
+      duration: 20
+  api:
+    type: completion
+    streaming: true
+  server:
+    type: vllm
+    model_name: meta-llama/Llama-3.1-8B-Instruct
+    base_url: http://0.0.0.0:8000
+    ignore_eos: true
+  tokenizer:
+    pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
+  data:
+    type: shareGPT
+  metrics:
+    type: prometheus
+    prometheus:
+      google_managed: true
+  report:
+    request_lifecycle:
+      summary: true
+      per_stage: true
+      per_request: true
+    prometheus:
+      summary: true
+      per_stage: true
diff --git a/benchmarking/benchmark.ipynb b/benchmarking/benchmark.ipynb
diff --git a/benchmarking/download-gcs-results.bash b/benchmarking/download-gcs-results.bash
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Downloads a file from a GCS bucket.
+
+# Check if GCS_BUCKET is provided as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <GCS_BUCKET> [GCS_FOLDER_PATH:DEFAULT=benchmark_results]"
+  exit 1
+fi
+
+GCS_BUCKET="$1"
+GCS_FOLDER_PATH="${2:-benchmark_results/}" # Default to benchmark_results/ if not provided
+
+# Env vars to be passed when calling this script.
+# The id of the benchmark. This is needed to identify what the benchmark is for.
+# It decides the filepath to save the results, which later is used by the jupyter notebook to assign
+# the benchmark_id as data labels for plotting.
+benchmark_id=${benchmark_id:-"inference-extension"}
+# run_id can be used to group different runs of the same benchmarks for comparison.
+run_id=${run_id:-"default-run"}
+output_dir=${output_dir:-'output'}
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+benchmark_output_dir=${SCRIPT_DIR}/${output_dir}/${run_id}/${benchmark_id}
+
+echo "Creating output directory: ${benchmark_output_dir}/results/json/"
+mkdir -p "${benchmark_output_dir}/results/json/"
+
+echo "Downloading gs://${GCS_BUCKET}/${GCS_FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
+gsutil cp -r "gs://${GCS_BUCKET}/${GCS_FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
+
+echo "Download complete."
diff --git a/benchmarking/inference-perf/.helmignore b/benchmarking/inference-perf/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/benchmarking/inference-perf/Chart.yaml b/benchmarking/inference-perf/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: inference-perf
+description: A Helm chart for running inference-perf benchmarking tool
+type: application
+version: 0.2.0
+appVersion: "0.2.0"
diff --git a/benchmarking/inference-perf/README.md b/benchmarking/inference-perf/README.md
@@ -0,0 +1,85 @@
+## 🚀 Deploying `inference-perf` via Helm Chart
+
+This guide explains how to deploy `inference-perf` to a Kubernetes cluster with Helm.
+
+Note: This is a temporary chart added until remote chart is available.
+
+---
+
+### 1. Prerequisites
+
+Make sure you have the following tools installed and configured:
+
+* **Kubernetes Cluster:** Access to a functional cluster (e.g., GKE).
+* **Helm:** The Helm CLI installed locally.
+
+---
+
+### 2. Configuration (`values.yaml`)
+
+Before deployment, navigate to the **`deploy/inference-perf`** directory and edit the **`values.yaml`** file to customize your deployment and the benchmark parameters.
+
+#### Optional Parameters
+
+| Key | Description | Default |
+| :--- | :--- | :--- |
+| `hfToken` | Hugging Face API token. If provided, a Kubernetes `Secret` named `hf-token-secret` will be created for authentication. | `""` |
+| `serviceAccountName` | Standard Kubernetes `serviceAccountName`. If not provided, default service account is used. | `""` |
+| `nodeSelector` |  Standard Kubernetes `nodeSelector` map to constrain pod placement to nodes with matching labels. | `{}` |
+| `resources` | Standard Kubernetes resource requests and limits for the main `inference-perf` container. | `{}` |
+---
+
+> **Example Resource Block:**
+> ```yaml
+> # resources:
+> #   requests:
+> #     cpu: "1"
+> #     memory: "4Gi"
+> #   limits:
+> #     cpu: "2"
+> #     memory: "8Gi"
+> ```
+
+#### GKE Specific Parameters
+
+This section details the necessary configuration and permissions for using a Google Cloud Storage (GCS) path to manage your dataset, typical for deployments on GKE.
+
+##### Required IAM Permissions
+
+The identity executing the workload (e.g., the associated Kubernetes Service Account, often configured via **Workload Identity**) must possess the following IAM roles on the target GCS bucket for data transfer:
+
+* **`roles/storage.objectViewer`** (Required to read/download the input dataset from GCS).
+* **`roles/storage.objectCreator`** (Required to write/push benchmark results back to GCS).
+
+
+| Key | Description | Default |
+| :--- | :--- | :--- |
+| `gcsPath` | A GCS URI pointing to the dataset file (e.g., `gs://my-bucket/dataset.json`). The file will be automatically copied to the running pod during initialization. | `""` |
+
+---
+
+### 3. Run Deployment
+
+Use the **`helm install`** command from the **`deploy/inference-perf`** directory to deploy the chart.
+
+* **Standard Install:** Deploy using the default `values.yaml`.
+    ```bash
+    helm install test .
+    ```
+
+* **Set `hfToken` Override:** Pass the Hugging Face token directly.
+    ```bash
+    helm install test . --set hfToken="<TOKEN>"
+    ```
+
+* **Custom Config Override:** Make changes to the values file for custom settings.
+    ```bash
+    helm install test . -f values.yaml
+    ```
+
+### 4. Cleanup
+
+To remove the benchmark deployment.
+    ```bash
+    helm uninstall test
+    ```
diff --git a/benchmarking/inference-perf/templates/_helpers.tpl b/benchmarking/inference-perf/templates/_helpers.tpl
@@ -0,0 +1,72 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "inference-perf.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "inference-perf.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "inference-perf.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "inference-perf.labels" -}}
+helm.sh/chart: {{ include "inference-perf.chart" . }}
+{{ include "inference-perf.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "inference-perf.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "inference-perf.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Common Secret Name for HuggingFace credentials
+*/}}
+{{- define "inference-perf.hfSecret" -}}
+{{ include "inference-perf.fullname" . }}-hf-secret
+{{- end -}}
+
+{{/*
+Common Secret Key for HuggingFace credentials
+*/}}
+{{- define "inference-perf.hfKey" -}}
+{{ include "inference-perf.fullname" . }}-hf-key
+{{- end -}}
+
+{{/*
+Mount path for config map
+*/}}
+{{- define "inference-perf.configMount" -}}
+/cfg
+{{- end -}}
diff --git a/benchmarking/inference-perf/templates/configmap.yaml b/benchmarking/inference-perf/templates/configmap.yaml
@@ -0,0 +1,10 @@
+# inference-perf/templates/configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "inference-perf.fullname" . }}-config
+  labels:
+    {{- include "inference-perf.labels" . | nindent 4 }}
+data:
+  config.yml: |
+    {{- toYaml .Values.config | nindent 4 }}
diff --git a/benchmarking/inference-perf/templates/job.yaml b/benchmarking/inference-perf/templates/job.yaml
@@ -0,0 +1,57 @@
+# inference-perf/templates/job.yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ include "inference-perf.fullname" . }}-job
+  labels:
+    {{- include "inference-perf.labels" . | nindent 4 }}
+    app: inference-perf
+spec:
+  template:
+    metadata:
+      labels:
+        {{- include "inference-perf.selectorLabels" . | nindent 8 }}
+        app: inference-perf
+    spec:
+      restartPolicy: Never
+      serviceAccountName: {{ .Values.job.serviceAccountName }}
+      {{- with .Values.job.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- if .Values.gcsPath}}
+      initContainers:
+        - name: fetch-dataset
+          image: google/cloud-sdk:latest
+          command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/dataset.json"]
+          volumeMounts:
+            - name: dataset-volume
+              mountPath: /dataset
+{{- end }}
+      containers:
+        - name: inference-perf-container
+          image: "{{ .Values.job.image.repository }}:{{ .Values.job.image.tag | default .Chart.AppVersion }}"
+          command: ["inference-perf"]
+          args:
+            - "--config_file"
+            - "{{ include "inference-perf.configMount" . }}/config.yml"
+            - "--log-level"
+            - {{ .Values.logLevel }}
+          env:
+            {{- if .Values.hfToken }}
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "inference-perf.hfSecret" . }}
+                  key: {{ include "inference-perf.hfKey" . }}
+            {{- end }}
+          volumeMounts:
+            - name: config-volume
+              mountPath: {{ include "inference-perf.configMount" . }}
+              readOnly: true
+          resources:
+            {{- toYaml .Values.job.resources | nindent 12 }}
+      volumes:
+        - name: config-volume
+          configMap:
+            name: {{ include "inference-perf.fullname" . }}-config
diff --git a/benchmarking/inference-perf/templates/secret.yaml b/benchmarking/inference-perf/templates/secret.yaml
@@ -0,0 +1,12 @@
+# inference-perf/templates/secret.yaml
+{{- if .Values.hfToken }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "inference-perf.hfSecret" . }}
+  labels:
+    {{- include "inference-perf.labels" . | nindent 4 }}
+type: Opaque
+stringData:
+  {{ include "inference-perf.hfKey" . }}: {{ .Values.hfToken | quote }}
+{{- end }}