11# =============================================================================
22# Backend Dockerfile - FastAPI Application (Two-Image Strategy)
33# =============================================================================
4- # Supports two build modes:
5- # 1. Full build: FROM python:3.11-slim-bookworm (CI base + local dev)
6- # 2. Extension build: FROM ghcr.io/.../backend:base (homelab with Docling)
7- #
8- # CI builds base image (no Docling) -> pushes to GHCR
9- # Homelab builds FROM base + adds Docling layer
4+ # Build modes:
5+ # 1. CI (base): USE_DOCLING=false -> backend:base with core deps
6+ # 2. Homelab: FROM backend:base + USE_DOCLING=true -> installs Docling
7+ # 3. Local dev: USE_DOCLING=true -> full build with everything
108# =============================================================================
119
12- # Base image ARG - allows building FROM a pre-built base image
1310ARG BASE_IMAGE=python:3.11-slim-bookworm
1411
12+ # =============================================================================
13+ # Builder stage - compiles Python packages (only for fresh builds)
14+ # =============================================================================
1515FROM python:3.11-slim-bookworm AS builder
1616
17- # Build arg to control Docling installation (default: true for local builds)
18- # Set USE_DOCLING=false in CI to save disk space
1917ARG USE_DOCLING=true
2018
2119WORKDIR /app
2220
23- # Install build dependencies
2421RUN apt-get update && apt-get install -y --no-install-recommends \
25- build-essential \
26- gcc \
27- g++ \
22+ build-essential gcc g++ \
2823 && rm -rf /var/lib/apt/lists/*
2924
30- # Copy requirements
3125COPY requirements.txt .
3226
33- # =============================================================================
34- # Layer 1: Core dependencies
35- # No cache mounts - prevents disk space issues on CI runners
36- # =============================================================================
27+ # Core dependencies
3728RUN pip install --user --no-cache-dir --prefer-binary \
3829 fastapi==0.115.0 \
3930 uvicorn[standard]==0.32.0 \
@@ -62,11 +53,7 @@ RUN pip install --user --no-cache-dir --prefer-binary \
6253 langfuse>=2.0.0 \
6354 psycopg2-binary>=2.9.9
6455
65- # =============================================================================
66- # Layer 2: Docling dependencies (optional, ~8-10 min, ~2GB disk)
67- # Only installed if USE_DOCLING=true
68- # No cache - HuggingFace models downloaded at runtime, not build time
69- # =============================================================================
56+ # Docling deps (only if USE_DOCLING=true)
7057RUN if [ "$USE_DOCLING" = "true" ]; then \
7158 pip install --user --no-cache-dir --prefer-binary \
7259 transformers>=4.47.0 \
@@ -75,59 +62,65 @@ RUN if [ "$USE_DOCLING" = "true" ]; then \
7562 pip install --user --no-cache-dir --force-reinstall "huggingface-hub>=0.24.0,<1.0" && \
7663 pip uninstall -y opencv-python 2>/dev/null || true && \
7764 pip install --user --no-cache-dir opencv-python-headless; \
78- else \
79- echo "Skipping Docling install (USE_DOCLING=$USE_DOCLING)" ; \
8065 fi
8166
8267# =============================================================================
83- # Production stage - uses BASE_IMAGE (python:slim for CI, or ghcr backend:base for homelab)
68+ # Production stage
8469# =============================================================================
8570ARG BASE_IMAGE
86- FROM ${BASE_IMAGE}
71+ FROM ${BASE_IMAGE} AS production
8772
88- # Re-declare build args for production stage
8973ARG USE_DOCLING=true
9074
9175WORKDIR /app
9276
93- # Install runtime dependencies
94- # - Skip if building FROM base image (already has curl installed)
95- # - Add tesseract only if Docling enabled
96- RUN if [ ! -f /app/.base-marker ]; then \
97- apt-get update && apt-get install -y --no-install-recommends curl && \
98- rm -rf /var/lib/apt/lists/*; \
99- fi && \
100- if [ "$USE_DOCLING" = "true" ]; then \
77+ # Runtime deps
78+ RUN apt-get update && apt-get install -y --no-install-recommends \
79+ curl build-essential gcc g++ \
80+ && rm -rf /var/lib/apt/lists/*
81+
82+ # Tesseract for Docling OCR
83+ RUN if [ "$USE_DOCLING" = "true" ]; then \
10184 apt-get update && apt-get install -y --no-install-recommends \
102- tesseract-ocr \
103- tesseract-ocr-eng && \
104- rm -rf /var/lib/apt/lists/*; \
85+ tesseract-ocr tesseract-ocr-eng \
86+ && rm -rf /var/lib/apt/lists/*; \
10587 fi
10688
107- # Copy Python dependencies from builder
89+ # Copy packages from builder (for fresh builds from python:3.11-slim)
10890COPY --from=builder /root/.local /root/.local
10991ENV PATH=/root/.local/bin:$PATH
11092
111- # Set Python environment variables
93+ # If extending from base image AND Docling requested, install Docling deps
94+ # The base image already has core deps in /root/.local, we just add Docling
95+ RUN if [ "$USE_DOCLING" = "true" ]; then \
96+ echo "Installing Docling packages..." && \
97+ pip install --no-cache-dir --prefer-binary \
98+ transformers>=4.47.0 \
99+ docling>=2.14.0 \
100+ docling-core>=2.4.0 && \
101+ pip install --no-cache-dir --force-reinstall "huggingface-hub>=0.24.0,<1.0" && \
102+ pip uninstall -y opencv-python 2>/dev/null || true && \
103+ pip install --no-cache-dir opencv-python-headless && \
104+ echo "Docling installed successfully" ; \
105+ fi
106+
107+ # Clean up build tools to reduce image size
108+ RUN apt-get purge -y build-essential gcc g++ \
109+ && apt-get autoremove -y \
110+ && rm -rf /var/lib/apt/lists/*
111+
112112ENV PYTHONDONTWRITEBYTECODE=1 \
113113 PYTHONUNBUFFERED=1 \
114114 HF_HOME=/app/data/huggingface \
115115 TRANSFORMERS_CACHE=/app/data/huggingface
116116
117- # Copy application code
118117COPY app/ ./app/
119-
120- # Copy Alembic for database migrations
121118COPY alembic.ini ./
122119COPY alembic/ ./alembic/
123120
124- # Create directories for data persistence
125121RUN mkdir -p /app/data/chroma_db /app/data/documents /app/logs /app/data/huggingface \
126122 && chmod -R 755 /app/data /app/logs
127123
128- # Mark as base image (used to detect if we're extending from base)
129- RUN touch /app/.base-marker
130-
131124EXPOSE 8000
132125
133126HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
0 commit comments