Merge pull request #102 from stackhpc/feat/xpu-support

sd109 · web-flow · commit 335f6bff2ac2 · 2025-05-13T09:29:46.000+01:00
Added Intel XPU support
diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml
@@ -0,0 +1,43 @@
+name: Publish vLLM XPU images
+
+on:
+  # NOTE(sd109): Since this is checking out an external
+  # it's probably safer to leave this as workflow dispatch
+  # only so that we can manually build images from specific
+  # refs rather than automatically pulling in the latest
+  # content from the remote repo.
+  workflow_dispatch:
+    inputs:
+      vllm_ref:
+        type: string
+        description: The vLLM GitHub ref (tag, branch or commit) to build.
+        required: true
+
+jobs:
+  build_push_xpu_image:
+    name: Build and push image
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write         # needed for signing the images with GitHub OIDC Token
+      packages: write         # required for pushing container images
+      security-events: write  # required for pushing SARIF files
+    steps:
+      - name: Check out the vLLM repository
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm_ref }}
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push image
+        run: |
+          IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
+          docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g .
+          docker push $IMAGE
diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml
@@ -19,8 +19,13 @@ spec:
     spec:
       containers:
       - name: {{ .Release.Name }}-api
-        {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
-        image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
+        {{- if eq (.Values.api.gpus | int) 0 }}
+        image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
+        {{- else if .Values.api.intelXPUsEnabled }}
+        image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
+        {{- else }}
+        image: "vllm/vllm-openai:{{ .Values.api.image.version }}"
+        {{- end }}
         ports:
         - name: api
           containerPort: 8000
@@ -61,7 +66,11 @@ spec:
           periodSeconds: 10
         resources:
           limits:
+            {{- if .Values.api.intelXPUsEnabled }}
+            gpu.intel.com/i915: {{ .Values.api.gpus | int }}
+            {{- else }}
             nvidia.com/gpu: {{ .Values.api.gpus | int }}
+            {{- end }}
       volumes:
         - name: data
           {{- .Values.api.cacheVolume | toYaml | nindent 10 }}
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
@@ -33,8 +33,9 @@ api:
   enabled: true
   # Container image config
   image:
-    # Defaults to vllm/vllm-openai when api.gpus > 0
-    # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
+    # Defaults to vllm/vllm-openai when api.gpus > 0,
+    # ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
+    # or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
     repository:
     version: v0.8.5.post1
   monitoring:
@@ -80,6 +81,8 @@ api:
   # distributed / multi-GPU support should be available, though it
   # has not been tested against this app.
   gpus: 1
+  # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
+  intelXPUsEnabled: false
   # The update strategy to use for the deployment
   # See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
   # NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.