File tree 3 files changed +59
-4
lines changed
3 files changed +59
-4
lines changed Original file line number Diff line number Diff line change
1
+ name : Publish vLLM XPU images
2
+
3
+ on :
4
+ # NOTE(sd109): Since this is checking out an external
5
+ # it's probably safer to leave this as workflow dispatch
6
+ # only so that we can manually build images from specific
7
+ # refs rather than automatically pulling in the latest
8
+ # content from the remote repo.
9
+ workflow_dispatch :
10
+ inputs :
11
+ vllm_ref :
12
+ type : string
13
+ description : The vLLM GitHub ref (tag, branch or commit) to build.
14
+ required : true
15
+
16
+ jobs :
17
+ build_push_xpu_image :
18
+ name : Build and push image
19
+ runs-on : ubuntu-latest
20
+ permissions :
21
+ contents : read
22
+ id-token : write # needed for signing the images with GitHub OIDC Token
23
+ packages : write # required for pushing container images
24
+ security-events : write # required for pushing SARIF files
25
+ steps :
26
+ - name : Check out the vLLM repository
27
+ uses : actions/checkout@v4
28
+ with :
29
+ repository : vllm-project/vllm
30
+ ref : ${{ inputs.vllm_ref }}
31
+
32
+ - name : Login to GitHub Container Registry
33
+ uses : docker/login-action@v3
34
+ with :
35
+ registry : ghcr.io
36
+ username : ${{ github.actor }}
37
+ password : ${{ secrets.GITHUB_TOKEN }}
38
+
39
+ - name : Build and push image
40
+ run : |
41
+ IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
42
+ docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g .
43
+ docker push $IMAGE
Original file line number Diff line number Diff line change 19
19
spec :
20
20
containers :
21
21
- name : {{ .Release.Name }}-api
22
- {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
23
- image : {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
22
+ {{- if eq (.Values.api.gpus | int) 0 }}
23
+ image : " ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
24
+ {{- else if .Values.api.intelXPUsEnabled }}
25
+ image : " ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
26
+ {{- else }}
27
+ image : " vllm/vllm-openai:{{ .Values.api.image.version }}"
28
+ {{- end }}
24
29
ports :
25
30
- name : api
26
31
containerPort : 8000
61
66
periodSeconds : 10
62
67
resources :
63
68
limits :
69
+ {{- if .Values.api.intelXPUsEnabled }}
70
+ gpu.intel.com/i915 : {{ .Values.api.gpus | int }}
71
+ {{- else }}
64
72
nvidia.com/gpu : {{ .Values.api.gpus | int }}
73
+ {{- end }}
65
74
volumes :
66
75
- name : data
67
76
{{- .Values.api.cacheVolume | toYaml | nindent 10 }}
Original file line number Diff line number Diff line change 33
33
enabled : true
34
34
# Container image config
35
35
image :
36
- # Defaults to vllm/vllm-openai when api.gpus > 0
37
- # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
36
+ # Defaults to vllm/vllm-openai when api.gpus > 0,
37
+ # ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
38
+ # or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
38
39
repository :
39
40
version : v0.8.5.post1
40
41
monitoring :
80
81
# distributed / multi-GPU support should be available, though it
81
82
# has not been tested against this app.
82
83
gpus : 1
84
+ # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
85
+ intelXPUsEnabled : false
83
86
# The update strategy to use for the deployment
84
87
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
85
88
# NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
You can’t perform that action at this time.
0 commit comments