canton-mcp-server/Dockerfile.gpu at main · ChainSafe/canton-mcp-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Canton MCP Server - GPU Dockerfile (SentenceTransformers + PyTorch CUDA)
# Uses the same structure as Dockerfile but installs GPU deps and pre-downloads
# the sentence-transformers model for CUDA-accelerated embeddings (~55x faster on T4).
#
# Build:  docker compose -f docker-compose.yml -f docker-compose.mainnet.yml build
# Run:    docker compose -f docker-compose.yml -f docker-compose.mainnet.yml up -d

FROM python:3.12-slim AS builder

ARG HF_TOKEN=""
ENV HF_TOKEN=${HF_TOKEN}

# Install git for cloning documentation repositories
RUN apt-get update && \
    apt-get install -y --no-install-recommends git && \
    rm -rf /var/lib/apt/lists/*

# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

# Set working directory
WORKDIR /app

# Copy dependency files and README (needed by hatchling build)
COPY pyproject.toml uv.lock README.md ./

# Clone documentation repositories BEFORE copying src/ so source changes
# don't invalidate this expensive layer (~5 min git clone + indexing)
RUN mkdir -p /app/docs && \
    git clone --depth 1 https://github.com/digital-asset/daml.git /app/docs/daml && \
    git clone --depth 1 https://github.com/digital-asset/canton.git /app/docs/canton && \
    git clone --depth 1 https://github.com/digital-asset/daml-finance.git /app/docs/daml-finance

# Copy source code (needed for editable install)
COPY src/ ./src/

# Install dependencies with GPU extras
RUN uv sync --frozen --no-dev --extra gpu

# Install PyTorch with CUDA support (cu124 for CUDA 12.x compatibility)
RUN uv pip install torch --index-url https://download.pytorch.org/whl/cu124

# Pre-download sentence-transformers model (~90MB)
# At build time there's no GPU, so this just caches the model weights.
# At runtime with EMBEDDING_DEVICE=cuda, they load onto the GPU.
RUN HOME=/root uv run python -c "\
from sentence_transformers import SentenceTransformer; \
SentenceTransformer('all-MiniLM-L6-v2') \
"

# No pre-indexing at build time — GPU indexes at startup via warmup task

# Final stage — CUDA runtime so PyTorch can actually use the GPU
# (python:3.12-slim has zero CUDA libs; PyTorch silently falls back to CPU)
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04

# Prevent interactive prompts (tzdata, etc.)
ENV DEBIAN_FRONTEND=noninteractive

# Install Python 3.12 from deadsnakes PPA + git
RUN apt-get update && \
    apt-get install -y --no-install-recommends software-properties-common && \
    add-apt-repository -y ppa:deadsnakes/ppa && \
    apt-get update && \
    apt-get install -y --no-install-recommends \
        python3.12 python3.12-venv python3.12-dev git ca-certificates && \
    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
    update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
    rm -rf /var/lib/apt/lists/*

# Install uv in final stage
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

# Create non-root user
RUN useradd -m -u 1000 canton && \
    mkdir -p /app && \
    chown -R canton:canton /app

WORKDIR /app

# Copy installed dependencies from builder (includes GPU deps)
COPY --from=builder --chown=canton:canton /app/.venv /app/.venv

# Fix venv Python symlink: builder has Python at /usr/local/bin/python3,
# but this stage (nvidia/cuda + deadsnakes) has it at /usr/bin/python3.12
RUN ln -sf /usr/bin/python3.12 /app/.venv/bin/python

# Copy documentation repositories from builder
COPY --from=builder --chown=canton:canton /app/docs /app/docs

# Copy pre-downloaded sentence-transformers model (cached under huggingface hub)
COPY --from=builder --chown=canton:canton /root/.cache/huggingface /home/canton/.cache/huggingface

# Copy application code
COPY --chown=canton:canton pyproject.toml uv.lock README.md ./
COPY --chown=canton:canton src/ ./src/
COPY --chown=canton:canton schemas/ ./schemas/

# Switch to non-root user
USER canton

# Set Python path to use virtual environment
ENV PATH="/app/.venv/bin:$PATH"
ENV PYTHONPATH="/app/src:$PYTHONPATH"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

# Expose MCP server port
EXPOSE 7284

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7284/health').read()" || exit 1

# Default command
CMD ["canton-mcp-server"]