[XPU] Add XPU Dockerfile and related docs

yma11 · yma11 · commit 7e035f4ede95 · 2026-02-03T02:22:20.000Z
Signed-off-by: Yan Ma &lt;yan.ma@intel.com&gt;
diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu
@@ -0,0 +1,28 @@
+ARG BASE_IMAGE=gar-registry.caas.intel.com/pytorch/pytorch-ipex-spr:multi-bmg_release_2601_py2.10_ww2605.5
+FROM ${BASE_IMAGE} AS final
+
+ARG COMMON_WORKDIR=/workspace
+
+WORKDIR ${COMMON_WORKDIR}
+
+RUN mkdir -p ${COMMON_WORKDIR}/vllm-omni
+
+COPY . ${COMMON_WORKDIR}/vllm-omni
+
+RUN cd ${COMMON_WORKDIR}/vllm-omni && python -m pip install --no-cache-dir ".[dev]" --no-build-isolation
+
+# FIX triton
+RUN --mount=type=cache,target=/root/.cache/pip pip uninstall triton triton-xpu -y && pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu
+
+RUN ln -sf /usr/bin/python3 /usr/bin/python
+
+CMD ["/bin/bash"]
+
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+
+ENTRYPOINT []
+
+#Set entrypoint for vllm-openai official images
+FROM final AS vllm-openai
+RUN cd ${COMMON_WORKDIR}/vllm-omni
+ENTRYPOINT ["vllm", "serve", "--omni"]
diff --git a/docs/getting_started/installation/README.md b/docs/getting_started/installation/README.md
@@ -5,4 +5,5 @@ vLLM-Omni supports the following hardware platforms:
 - [GPU](gpu.md)
     - [NVIDIA CUDA](gpu.md)
     - [AMD ROCm](gpu.md)
+    - [Intel XPU](gpu.md)
 - [NPU](npu.md)
diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md
@@ -18,6 +18,10 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
     --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:requirements"
 
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:requirements"
+
 ## Set up using Python
 
 ### Create a new Python environment
@@ -30,11 +34,14 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
     --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels"
 
-
 === "AMD ROCm"
 
     --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-wheels"
 
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-wheels"
+
 [](){ #build-from-source }
 
 ### Build wheel from source
@@ -47,6 +54,10 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
     --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-wheel-from-source"
 
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-wheel-from-source"
+
 ## Set up using Docker
 
 ### Pre-built images
@@ -59,8 +70,17 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr
 
     --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-images"
 
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-images"
+
 ### Build your own docker image
 
 === "AMD ROCm"
 
     --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-docker"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-docker"
+
diff --git a/docs/getting_started/installation/gpu/xpu.inc.md b/docs/getting_started/installation/gpu/xpu.inc.md
@@ -0,0 +1,50 @@
+# --8<-- [start:requirements]
+
+- GPU: Validated on Intel® Arc™ B-Series (It should be supported on the AMD GPUs that are supported by vLLM.)
+
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
+
+vLLM-Omni current recommends the steps in under setup through Docker Images.
+
+# --8<-- [start:pre-built-wheels]
+
+# --8<-- [end:pre-built-wheels]
+
+# --8<-- [start:build-wheel-from-source]
+
+# --8<-- [end:build-wheel-from-source]
+
+# --8<-- [start:build-docker]
+
+#### Build docker image
+
+```bash
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.xpu -t vllm-omni-xpu --shm-size=4g .
+```
+
+#### Launch the docker image
+
+##### Launch with OpenAI API Server
+
+```
+docker run -it -d --shm-size 10g \
+  --name {container_name} \
+  --net=host \
+  --ipc=host \
+  --privileged \
+  -v /dev/dri/by-path:/dev/dri/by-path \
+  --device /dev/dri:/dev/dri \
+  -v ~/.cache/huggingface:/root/.cache/huggingface \
+  --env "HF_TOKEN=$HF_TOKEN" \
+  -p 8091:8091 \
+  vllm-omni-xpu \
+  --model Qwen/Qwen2.5-Omni-3B --port 8091
+```
+
+# --8<-- [end:build-docker]
+
+# --8<-- [start:pre-built-images]
+
+# --8<-- [end:pre-built-images]
+