11## Minimal runtime Dockerfile (microdnf-only, no torch, wrapper in site-packages)
2- # Go dependencies stage: download go modules and extract kv-cache
3- FROM quay.io/projectquay/golang:1.24 AS go-deps
2+ ## Simplified EPP Dockerfile - UDS tokenizer only (no vLLM, no embedded tokenizer)
3+ ## This build uses the default kv-cache pool (UDS-only, no embedded_tokenizers build tag)
4+ ## Tokenization is handled by a separate UDS tokenizer sidecar container
5+ ##
6+ ## CGO is still required for ZMQ (kvevents) but Python/vLLM dependencies are removed
7+ # Go build stage
8+ FROM quay.io/projectquay/golang:1.25 AS go-builder
9+
10+ ARG TARGETOS
11+ ARG TARGETARCH
412
513WORKDIR /workspace
614
15+ # Install ZMQ development libraries (required for CGO)
16+ # The builder is based on UBI8, so we need epel-release-8
17+ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
18+ dnf install -y zeromq-devel pkgconfig && \
19+ dnf clean all
20+
721# Copy the Go Modules manifests
822COPY go.mod go.mod
923COPY go.sum go.sum
@@ -14,98 +28,26 @@ COPY pkg/ pkg/
1428
1529RUN go mod download
1630
17- # Copy Python wrapper and requirements from llm-d-kv-cache dependency
18- # Extract version dynamically and copy to a known location
19- RUN KV_CACHE_PKG=$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache) && \
20- mkdir -p /workspace/kv-cache && \
21- cp -r $KV_CACHE_PKG/* /workspace/kv-cache
22-
23- FROM python:3.12-slim AS python-builder
24-
25- ARG TARGETARCH
26-
27- COPY --from=go-deps /workspace/kv-cache /workspace/kv-cache
28- WORKDIR /workspace/kv-cache
29-
30- # Create venv and install vLLM based on architecture using pre-built wheels
31- RUN python3.12 -m venv /workspace/kv-cache/build/venv && \
32- . /workspace/kv-cache/build/venv/bin/activate && \
33- pip install --upgrade pip && \
34- VLLM_VERSION="0.14.0" && \
35- if [ "$TARGETARCH" = "arm64" ]; then \
36- pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
37- elif [ "$TARGETARCH" = "amd64" ]; then \
38- pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
39- else \
40- echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \
41- fi
42-
43- # Go build stage
44- FROM quay.io/projectquay/golang:1.24 AS go-builder
45-
46- ARG TARGETOS
47- ARG TARGETARCH
48- ARG PYTHON_VERSION=3.12
49- ENV PYTHON=python${PYTHON_VERSION}
50-
51- # Install build tools
52- # The builder is based on UBI8, so we need epel-release-8.
53- # ${PYTHON}-devel needed for CGO compilation (Python headers and ${PYTHON}-config for linker flags)
54- RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
55- dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig ${PYTHON}-devel ${PYTHON}-pip git && \
56- dnf clean all
57-
58- COPY --from=go-deps /workspace /workspace
59- COPY --from=go-deps /go/pkg/mod /go/pkg/mod
60-
61- WORKDIR /workspace
62-
63- COPY Makefile* ./
64-
65- COPY --from=python-builder /workspace/kv-cache/pkg/preprocessing/chat_completions /workspace/kv-cache/pkg/preprocessing/chat_completions
66- RUN make setup-venv
67- COPY --from=python-builder /workspace/kv-cache/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
68-
69- ENV PYTHONPATH=/workspace/kv-cache/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
70- RUN python3.12 -c "import tokenizer_wrapper" # verify tokenizer_wrapper is correctly installed
71-
72- ARG RELEASE_VERSION=v1.22.1
73- RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build-epp
31+ # Build EPP with CGO for ZMQ only (no Python, no embedded tokenizer)
32+ # The default kv-cache build uses UDS tokenizer (//go:build !embedded_tokenizers)
33+ RUN CGO_ENABLED=1 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o bin/epp cmd/epp/main.go
7434
7535# Runtime stage
7636# Use ubi9 as a minimal base image to package the manager binary
7737# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details
7838FROM registry.access.redhat.com/ubi9/ubi-minimal:9.7
79- ARG PYTHON_VERSION=3.12
80- WORKDIR /
81- COPY --from=go-builder /workspace/bin/epp /app/epp
8239
83- USER root
40+ WORKDIR /
8441
85- ENV PYTHON=python${PYTHON_VERSION}
86- # Install zeromq runtime library and Python runtime needed by the manager.
87- # The final image is UBI9, so we need epel-release-9.
88- # Using microdnf for minimal image size
42+ # Install ZMQ runtime library only (no Python needed)
8943RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
9044 rpm -i /tmp/epel-release.rpm && \
9145 rm /tmp/epel-release.rpm && \
92- microdnf install -y --setopt=install_weak_deps=0 zeromq ${PYTHON} ${PYTHON}-libs ${PYTHON}-pip && \
46+ microdnf install -y --setopt=install_weak_deps=0 zeromq && \
9347 microdnf clean all && \
94- rm -rf /var/cache/yum /var/lib/yum && \
95- # Note: ${PYTHON} package does not automatically create python3/python symlinks - they must be created manually
96- ln -sf /usr/bin/${PYTHON} /usr/bin/python3 && \
97- ln -sf /usr/bin/${PYTHON} /usr/bin/python
48+ rm -rf /var/cache/yum /var/lib/yum
9849
99- # Copy Python kv-cache package and site-packages from the python-builder stage
100- COPY --from=python-builder /workspace/kv-cache /workspace/kv-cache
101- ENV PYTHONPATH=/workspace/kv-cache/pkg/preprocessing/chat_completions:/workspace/kv-cache/build/venv/lib/python3.12/site-packages
102- RUN ${PYTHON} -c "import tokenizer_wrapper" # verify tokenizer_wrapper is correctly installed
103-
104- ENV HF_HOME="/tmp/.cache"
105- # used by kv-cache-manager
106- ENV LOCAL_TOKENIZER_DIR="/tmp/.cache"
107- # Create cache directory and set permissions for non-root user
108- RUN mkdir -p /tmp/.cache && chown -R 65532:65532 ${HF_HOME}
50+ COPY --from=go-builder /workspace/bin/epp /app/epp
10951
11052USER 65532:65532
11153
0 commit comments