-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
107 lines (85 loc) · 3.33 KB
/
Dockerfile
File metadata and controls
107 lines (85 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# =========================
# Builder: compile llama.cpp with CUDA (architectures=all)
# =========================
FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel AS builder
ARG LLAMA_CPP_TAG=master
ARG MAKE_JOBS=8
ENV DEBIAN_FRONTEND=noninteractive \
TZ=America/Los_Angeles \
PIP_ROOT_USER_ACTION=ignore \
HF_HUB_ENABLE_HF_TRANSFER=1
RUN apt-get update && apt-get install -y --no-install-recommends \
git ca-certificates build-essential cmake ninja-build pkg-config curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
cd llama.cpp && git fetch --all --tags && git checkout "${LLAMA_CPP_TAG}"
# Build with CUDA and *all* architectures; copy all produced binaries
RUN cd /opt/llama.cpp && \
cmake -S . -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DGGML_STATIC=ON \
-DGGML_CUDA=ON \
-DGGML_CUDA_F16=ON \
-DGGML_CUDA_DMMV_X=64 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_CUDA_ARCHITECTURES=all \
-DLLAMA_CURL=OFF \
&& cmake --build build -j "${MAKE_JOBS}" \
&& mkdir -p /opt/llama.bin && cp -v build/bin/* /opt/llama.bin/
# =========================
# Build llama-swap from source (Go 1.23 + UI) and normalize artifact
# =========================
FROM golang:1.23-bookworm AS swapbuild
ENV CGO_ENABLED=0 GOFLAGS="-trimpath"
WORKDIR /src
# UI deps + make-based build
RUN apt-get update && apt-get install -y --no-install-recommends \
git ca-certificates make nodejs npm \
&& rm -rf /var/lib/apt/lists/* \
&& npm -g install pnpm@9
# Clone and build (emits into build/…); then normalize to /out/llama-swap
RUN git clone https://github.com/mostlygeek/llama-swap.git . \
&& make clean all \
&& set -eux; \
mkdir -p /out; \
bin="$(find build -maxdepth 1 -type f -perm -111 -name 'llama-swap*' | head -n1)"; \
test -n "$bin"; \
install -Dm755 "$bin" /out/llama-swap
# =========================
# Runtime: CUDA + llama.cpp bins + llama-swap
# =========================
FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-runtime
ENV DEBIAN_FRONTEND=noninteractive \
TZ=America/Los_Angeles \
PIP_ROOT_USER_ACTION=ignore \
HF_HUB_ENABLE_HF_TRANSFER=1
RUN apt-get update && apt-get install -y --no-install-recommends \
git ca-certificates wget curl tini bash libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# llama.cpp binaries
COPY --from=builder /opt/llama.bin/ /usr/local/bin/
# llama-swap (normalized path)
COPY --from=swapbuild /out/llama-swap /usr/local/bin/llama-swap
# Working directory for configs/models/scripts
WORKDIR /project
# Run wrapper: requires a mounted /project/config.yaml
COPY --chown=root:root <<'BASH' /project/scripts/run.sh
#!/usr/bin/env bash
set -euo pipefail
CONFIG="/project/config.yaml"
: "${LISTEN_ADDR:=0.0.0.0:8080}"
if [[ ! -f "$CONFIG" ]]; then
echo "ERROR: $CONFIG not found. Mount your config file, e.g.:"
echo " docker run ... -v \$(pwd)/config.yaml:/project/config.yaml ..."
exit 1
fi
exec /usr/bin/tini -- /usr/local/bin/llama-swap --config "$CONFIG" --listen "${LISTEN_ADDR}"
BASH
RUN chmod +x /project/scripts/run.sh
# Simple healthcheck for the UI
HEALTHCHECK --interval=30s --timeout=5s --retries=5 \
CMD curl -fsS http://127.0.0.1:8080/ui >/dev/null || exit 1
EXPOSE 8080
ENTRYPOINT ["/bin/bash", "/project/scripts/run.sh"]