-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
116 lines (88 loc) · 4.26 KB
/
Dockerfile.gpu
File metadata and controls
116 lines (88 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Canton MCP Server - GPU Dockerfile (SentenceTransformers + PyTorch CUDA)
# Uses the same structure as Dockerfile but installs GPU deps and pre-downloads
# the sentence-transformers model for CUDA-accelerated embeddings (~55x faster on T4).
#
# Build: docker compose -f docker-compose.yml -f docker-compose.mainnet.yml build
# Run: docker compose -f docker-compose.yml -f docker-compose.mainnet.yml up -d
FROM python:3.12-slim AS builder
ARG HF_TOKEN=""
ENV HF_TOKEN=${HF_TOKEN}
# Install git for cloning documentation repositories
RUN apt-get update && \
apt-get install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# Set working directory
WORKDIR /app
# Copy dependency files and README (needed by hatchling build)
COPY pyproject.toml uv.lock README.md ./
# Clone documentation repositories BEFORE copying src/ so source changes
# don't invalidate this expensive layer (~5 min git clone + indexing)
RUN mkdir -p /app/docs && \
git clone --depth 1 https://github.com/digital-asset/daml.git /app/docs/daml && \
git clone --depth 1 https://github.com/digital-asset/canton.git /app/docs/canton && \
git clone --depth 1 https://github.com/digital-asset/daml-finance.git /app/docs/daml-finance
# Copy source code (needed for editable install)
COPY src/ ./src/
# Install dependencies with GPU extras
RUN uv sync --frozen --no-dev --extra gpu
# Install PyTorch with CUDA support (cu124 for CUDA 12.x compatibility)
RUN uv pip install torch --index-url https://download.pytorch.org/whl/cu124
# Pre-download sentence-transformers model (~90MB)
# At build time there's no GPU, so this just caches the model weights.
# At runtime with EMBEDDING_DEVICE=cuda, they load onto the GPU.
RUN HOME=/root uv run python -c "\
from sentence_transformers import SentenceTransformer; \
SentenceTransformer('all-MiniLM-L6-v2') \
"
# No pre-indexing at build time — GPU indexes at startup via warmup task
# Final stage — CUDA runtime so PyTorch can actually use the GPU
# (python:3.12-slim has zero CUDA libs; PyTorch silently falls back to CPU)
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
# Prevent interactive prompts (tzdata, etc.)
ENV DEBIAN_FRONTEND=noninteractive
# Install Python 3.12 from deadsnakes PPA + git
RUN apt-get update && \
apt-get install -y --no-install-recommends software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y --no-install-recommends \
python3.12 python3.12-venv python3.12-dev git ca-certificates && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
rm -rf /var/lib/apt/lists/*
# Install uv in final stage
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# Create non-root user
RUN useradd -m -u 1000 canton && \
mkdir -p /app && \
chown -R canton:canton /app
WORKDIR /app
# Copy installed dependencies from builder (includes GPU deps)
COPY --from=builder --chown=canton:canton /app/.venv /app/.venv
# Fix venv Python symlink: builder has Python at /usr/local/bin/python3,
# but this stage (nvidia/cuda + deadsnakes) has it at /usr/bin/python3.12
RUN ln -sf /usr/bin/python3.12 /app/.venv/bin/python
# Copy documentation repositories from builder
COPY --from=builder --chown=canton:canton /app/docs /app/docs
# Copy pre-downloaded sentence-transformers model (cached under huggingface hub)
COPY --from=builder --chown=canton:canton /root/.cache/huggingface /home/canton/.cache/huggingface
# Copy application code
COPY --chown=canton:canton pyproject.toml uv.lock README.md ./
COPY --chown=canton:canton src/ ./src/
COPY --chown=canton:canton schemas/ ./schemas/
# Switch to non-root user
USER canton
# Set Python path to use virtual environment
ENV PATH="/app/.venv/bin:$PATH"
ENV PYTHONPATH="/app/src:$PYTHONPATH"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Expose MCP server port
EXPOSE 7284
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7284/health').read()" || exit 1
# Default command
CMD ["canton-mcp-server"]