forked from vllm-project/vllm-omni
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.cpu.ubi
More file actions
103 lines (84 loc) · 3.31 KB
/
Dockerfile.cpu.ubi
File metadata and controls
103 lines (84 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# syntax=docker/dockerfile:1
# check=skip=InvalidDefaultArgInFrom # defaults are specified in bakefiles
# vim: ft=dockerfile
ARG BASE_UBI_IMAGE_TAG
ARG PYTHON_VERSION
## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
ARG PYTHON_VERSION
ENV PYTHON_VERSION=${PYTHON_VERSION}
RUN microdnf -y update && microdnf install -y \
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
&& microdnf clean all
WORKDIR /workspace
ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8
RUN microdnf install -y \
which procps findutils tar \
# required for vllm cpu
numactl-libs \
gcc \
g++ \
&& microdnf clean all
## System Dependencies (multimedia support for vllm-omni) #####################
FROM base AS system-deps
# Install EPEL and multimedia packages (ffmpeg, sox)
RUN microdnf install -y \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \
&& microdnf install -y ffmpeg sox \
&& microdnf clean all
## Python Installer ############################################################
FROM system-deps AS python-install
ARG PYTHON_VERSION
ENV UV_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
ENV UV_INDEX_STRATEGY=unsafe-best-match
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
ENV PYTHON_VERSION=${PYTHON_VERSION}
RUN microdnf install -y \
python${PYTHON_VERSION}-devel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
pip install --no-cache -U pip wheel uv && \
microdnf clean all
## Release #####################################################################
FROM python-install AS vllm-omni-openai
ARG PYTHON_VERSION
WORKDIR /workspace
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
COPY LICENSE /licenses/vllm.md
COPY examples/*.jinja /app/data/template/
RUN mkdir -p /opt/app-root && \
ln -s /app/data/template /opt/app-root/template
# install vllm + vllm-omni (and dependencies) by running the payload script
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=payload,target=/workspace/payload \
./payload/run.sh
# VLLM_TARGET_DEVICE=cpu is required to force CPU platform
ENV HF_HUB_OFFLINE=1 \
HOME=/home/vllm \
VLLM_USAGE_STATS_SERVER=https://console.redhat.com/api/rhaiis-stats \
VLLM_USAGE_SOURCE=quay-docker-image \
OUTLINES_CACHE_DIR=/tmp/outlines \
NUMBA_CACHE_DIR=/tmp/numba \
TRITON_CACHE_DIR=/tmp/triton \
VLLM_CACHE_ROOT=/tmp/vllm \
VLLM_TARGET_DEVICE=cpu
# setup non-root user for OpenShift
RUN umask 002 && \
useradd --uid 2000 --gid 0 vllm && \
mkdir -p /home/vllm && \
chmod g+rwx /home/vllm
# pre-download tiktoken/harmony tokenizer for disconnected environments
# openai_harmony uses TIKTOKEN_RS_CACHE_DIR
# while the python tiktoken module uses TIKTOKEN_CACHE_DIR
ENV TIKTOKEN_CACHE_DIR=${HOME}/.cache/tiktoken
ENV TIKTOKEN_RS_CACHE_DIR=${TIKTOKEN_CACHE_DIR}
RUN --mount=type=bind,src=tools/predownload_tiktoken_tokenizers.py,target=predownload_tiktoken_tokenizers.py \
python predownload_tiktoken_tokenizers.py && \
chmod g+rwx "${TIKTOKEN_CACHE_DIR}"
USER 2000
RUN echo "ulimit -c 0" >> /home/vllm/.bashrc
WORKDIR /home/vllm
# vllm-omni uses vllm serve with --omni flag (matches upstream pattern)
CMD ["vllm", "serve", "--omni"]