Skip to content

Commit 335f6bf

Browse files
authored
Merge pull request #102 from stackhpc/feat/xpu-support
Added Intel XPU support
2 parents fd3f75e + ee1ded9 commit 335f6bf

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Publish vLLM XPU images
2+
3+
on:
4+
# NOTE(sd109): Since this is checking out an external
5+
# it's probably safer to leave this as workflow dispatch
6+
# only so that we can manually build images from specific
7+
# refs rather than automatically pulling in the latest
8+
# content from the remote repo.
9+
workflow_dispatch:
10+
inputs:
11+
vllm_ref:
12+
type: string
13+
description: The vLLM GitHub ref (tag, branch or commit) to build.
14+
required: true
15+
16+
jobs:
17+
build_push_xpu_image:
18+
name: Build and push image
19+
runs-on: ubuntu-latest
20+
permissions:
21+
contents: read
22+
id-token: write # needed for signing the images with GitHub OIDC Token
23+
packages: write # required for pushing container images
24+
security-events: write # required for pushing SARIF files
25+
steps:
26+
- name: Check out the vLLM repository
27+
uses: actions/checkout@v4
28+
with:
29+
repository: vllm-project/vllm
30+
ref: ${{ inputs.vllm_ref }}
31+
32+
- name: Login to GitHub Container Registry
33+
uses: docker/login-action@v3
34+
with:
35+
registry: ghcr.io
36+
username: ${{ github.actor }}
37+
password: ${{ secrets.GITHUB_TOKEN }}
38+
39+
- name: Build and push image
40+
run: |
41+
IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
42+
docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g .
43+
docker push $IMAGE

charts/azimuth-llm/templates/api/deployment.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,13 @@ spec:
1919
spec:
2020
containers:
2121
- name: {{ .Release.Name }}-api
22-
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
23-
image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
22+
{{- if eq (.Values.api.gpus | int) 0 }}
23+
image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
24+
{{- else if .Values.api.intelXPUsEnabled }}
25+
image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
26+
{{- else }}
27+
image: "vllm/vllm-openai:{{ .Values.api.image.version }}"
28+
{{- end }}
2429
ports:
2530
- name: api
2631
containerPort: 8000
@@ -61,7 +66,11 @@ spec:
6166
periodSeconds: 10
6267
resources:
6368
limits:
69+
{{- if .Values.api.intelXPUsEnabled }}
70+
gpu.intel.com/i915: {{ .Values.api.gpus | int }}
71+
{{- else }}
6472
nvidia.com/gpu: {{ .Values.api.gpus | int }}
73+
{{- end }}
6574
volumes:
6675
- name: data
6776
{{- .Values.api.cacheVolume | toYaml | nindent 10 }}

charts/azimuth-llm/values.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ api:
3333
enabled: true
3434
# Container image config
3535
image:
36-
# Defaults to vllm/vllm-openai when api.gpus > 0
37-
# or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
36+
# Defaults to vllm/vllm-openai when api.gpus > 0,
37+
# ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
38+
# or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
3839
repository:
3940
version: v0.8.5.post1
4041
monitoring:
@@ -80,6 +81,8 @@ api:
8081
# distributed / multi-GPU support should be available, though it
8182
# has not been tested against this app.
8283
gpus: 1
84+
# Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
85+
intelXPUsEnabled: false
8386
# The update strategy to use for the deployment
8487
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
8588
# NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.

0 commit comments

Comments
 (0)