Skip to content

Commit 0379da4

Browse files
committed
Fix Docling install when extending base image
1 parent a04dfa3 commit 0379da4

1 file changed

Lines changed: 41 additions & 48 deletions

File tree

backend/Dockerfile

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,30 @@
11
# =============================================================================
22
# Backend Dockerfile - FastAPI Application (Two-Image Strategy)
33
# =============================================================================
4-
# Supports two build modes:
5-
# 1. Full build: FROM python:3.11-slim-bookworm (CI base + local dev)
6-
# 2. Extension build: FROM ghcr.io/.../backend:base (homelab with Docling)
7-
#
8-
# CI builds base image (no Docling) -> pushes to GHCR
9-
# Homelab builds FROM base + adds Docling layer
4+
# Build modes:
5+
# 1. CI (base): USE_DOCLING=false -> backend:base with core deps
6+
# 2. Homelab: FROM backend:base + USE_DOCLING=true -> installs Docling
7+
# 3. Local dev: USE_DOCLING=true -> full build with everything
108
# =============================================================================
119

12-
# Base image ARG - allows building FROM a pre-built base image
1310
ARG BASE_IMAGE=python:3.11-slim-bookworm
1411

12+
# =============================================================================
13+
# Builder stage - compiles Python packages (only for fresh builds)
14+
# =============================================================================
1515
FROM python:3.11-slim-bookworm AS builder
1616

17-
# Build arg to control Docling installation (default: true for local builds)
18-
# Set USE_DOCLING=false in CI to save disk space
1917
ARG USE_DOCLING=true
2018

2119
WORKDIR /app
2220

23-
# Install build dependencies
2421
RUN apt-get update && apt-get install -y --no-install-recommends \
25-
build-essential \
26-
gcc \
27-
g++ \
22+
build-essential gcc g++ \
2823
&& rm -rf /var/lib/apt/lists/*
2924

30-
# Copy requirements
3125
COPY requirements.txt .
3226

33-
# =============================================================================
34-
# Layer 1: Core dependencies
35-
# No cache mounts - prevents disk space issues on CI runners
36-
# =============================================================================
27+
# Core dependencies
3728
RUN pip install --user --no-cache-dir --prefer-binary \
3829
fastapi==0.115.0 \
3930
uvicorn[standard]==0.32.0 \
@@ -62,11 +53,7 @@ RUN pip install --user --no-cache-dir --prefer-binary \
6253
langfuse>=2.0.0 \
6354
psycopg2-binary>=2.9.9
6455

65-
# =============================================================================
66-
# Layer 2: Docling dependencies (optional, ~8-10 min, ~2GB disk)
67-
# Only installed if USE_DOCLING=true
68-
# No cache - HuggingFace models downloaded at runtime, not build time
69-
# =============================================================================
56+
# Docling deps (only if USE_DOCLING=true)
7057
RUN if [ "$USE_DOCLING" = "true" ]; then \
7158
pip install --user --no-cache-dir --prefer-binary \
7259
transformers>=4.47.0 \
@@ -75,59 +62,65 @@ RUN if [ "$USE_DOCLING" = "true" ]; then \
7562
pip install --user --no-cache-dir --force-reinstall "huggingface-hub>=0.24.0,<1.0" && \
7663
pip uninstall -y opencv-python 2>/dev/null || true && \
7764
pip install --user --no-cache-dir opencv-python-headless; \
78-
else \
79-
echo "Skipping Docling install (USE_DOCLING=$USE_DOCLING)"; \
8065
fi
8166

8267
# =============================================================================
83-
# Production stage - uses BASE_IMAGE (python:slim for CI, or ghcr backend:base for homelab)
68+
# Production stage
8469
# =============================================================================
8570
ARG BASE_IMAGE
86-
FROM ${BASE_IMAGE}
71+
FROM ${BASE_IMAGE} AS production
8772

88-
# Re-declare build args for production stage
8973
ARG USE_DOCLING=true
9074

9175
WORKDIR /app
9276

93-
# Install runtime dependencies
94-
# - Skip if building FROM base image (already has curl installed)
95-
# - Add tesseract only if Docling enabled
96-
RUN if [ ! -f /app/.base-marker ]; then \
97-
apt-get update && apt-get install -y --no-install-recommends curl && \
98-
rm -rf /var/lib/apt/lists/*; \
99-
fi && \
100-
if [ "$USE_DOCLING" = "true" ]; then \
77+
# Runtime deps
78+
RUN apt-get update && apt-get install -y --no-install-recommends \
79+
curl build-essential gcc g++ \
80+
&& rm -rf /var/lib/apt/lists/*
81+
82+
# Tesseract for Docling OCR
83+
RUN if [ "$USE_DOCLING" = "true" ]; then \
10184
apt-get update && apt-get install -y --no-install-recommends \
102-
tesseract-ocr \
103-
tesseract-ocr-eng && \
104-
rm -rf /var/lib/apt/lists/*; \
85+
tesseract-ocr tesseract-ocr-eng \
86+
&& rm -rf /var/lib/apt/lists/*; \
10587
fi
10688

107-
# Copy Python dependencies from builder
89+
# Copy packages from builder (for fresh builds from python:3.11-slim)
10890
COPY --from=builder /root/.local /root/.local
10991
ENV PATH=/root/.local/bin:$PATH
11092

111-
# Set Python environment variables
93+
# If extending from base image AND Docling requested, install Docling deps
94+
# The base image already has core deps in /root/.local, we just add Docling
95+
RUN if [ "$USE_DOCLING" = "true" ]; then \
96+
echo "Installing Docling packages..." && \
97+
pip install --no-cache-dir --prefer-binary \
98+
transformers>=4.47.0 \
99+
docling>=2.14.0 \
100+
docling-core>=2.4.0 && \
101+
pip install --no-cache-dir --force-reinstall "huggingface-hub>=0.24.0,<1.0" && \
102+
pip uninstall -y opencv-python 2>/dev/null || true && \
103+
pip install --no-cache-dir opencv-python-headless && \
104+
echo "Docling installed successfully"; \
105+
fi
106+
107+
# Clean up build tools to reduce image size
108+
RUN apt-get purge -y build-essential gcc g++ \
109+
&& apt-get autoremove -y \
110+
&& rm -rf /var/lib/apt/lists/*
111+
112112
ENV PYTHONDONTWRITEBYTECODE=1 \
113113
PYTHONUNBUFFERED=1 \
114114
HF_HOME=/app/data/huggingface \
115115
TRANSFORMERS_CACHE=/app/data/huggingface
116116

117-
# Copy application code
118117
COPY app/ ./app/
119-
120-
# Copy Alembic for database migrations
121118
COPY alembic.ini ./
122119
COPY alembic/ ./alembic/
123120

124-
# Create directories for data persistence
125121
RUN mkdir -p /app/data/chroma_db /app/data/documents /app/logs /app/data/huggingface \
126122
&& chmod -R 755 /app/data /app/logs
127123

128-
# Mark as base image (used to detect if we're extending from base)
129-
RUN touch /app/.base-marker
130-
131124
EXPOSE 8000
132125

133126
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \

0 commit comments

Comments
 (0)