|
1 | 1 | # ============================================================================= |
2 | 2 | # Backend Dockerfile - FastAPI Application |
3 | 3 | # ============================================================================= |
4 | | -# Two-stage build with Docling support (optional via USE_DOCLING env var) |
5 | | -# First build takes ~10-15 minutes, subsequent builds use cache (~2-3 min) |
| 4 | +# Two-stage build with Docling support (optional via USE_DOCLING build arg) |
| 5 | +# Set USE_DOCLING=false to skip Docling install (saves ~2GB disk, uses PyPDF fallback) |
6 | 6 | # ============================================================================= |
7 | 7 |
|
8 | 8 | FROM python:3.11-slim-bookworm AS builder |
9 | 9 |
|
| 10 | +# Build arg to control Docling installation (default: false for smaller images) |
| 11 | +ARG USE_DOCLING=false |
| 12 | + |
10 | 13 | WORKDIR /app |
11 | 14 |
|
12 | | -# Install build dependencies (including those needed for Docling) |
| 15 | +# Install build dependencies |
13 | 16 | RUN apt-get update && apt-get install -y --no-install-recommends \ |
14 | 17 | build-essential \ |
15 | 18 | gcc \ |
@@ -52,29 +55,40 @@ RUN --mount=type=cache,target=/root/.cache/pip \ |
52 | 55 | psycopg2-binary>=2.9.9 |
53 | 56 |
|
54 | 57 | # ============================================================================= |
55 | | -# Layer 2: Docling dependencies (slow, ~8-10 min, cached after first build) |
| 58 | +# Layer 2: Docling dependencies (optional, ~8-10 min, ~2GB disk) |
| 59 | +# Only installed if USE_DOCLING=true |
56 | 60 | # ============================================================================= |
57 | 61 | RUN --mount=type=cache,target=/root/.cache/pip \ |
58 | 62 | --mount=type=cache,target=/root/.cache/huggingface \ |
59 | | - pip install --user --prefer-binary \ |
60 | | - transformers>=4.47.0 \ |
61 | | - docling>=2.14.0 \ |
62 | | - docling-core>=2.4.0 |
| 63 | + if [ "$USE_DOCLING" = "true" ]; then \ |
| 64 | + pip install --user --prefer-binary \ |
| 65 | + transformers>=4.47.0 \ |
| 66 | + docling>=2.14.0 \ |
| 67 | + docling-core>=2.4.0; \ |
| 68 | + else \ |
| 69 | + echo "Skipping Docling install (USE_DOCLING=$USE_DOCLING)"; \ |
| 70 | + fi |
63 | 71 |
|
64 | 72 | # ============================================================================= |
65 | 73 | # Production stage |
66 | 74 | # ============================================================================= |
67 | 75 | FROM python:3.11-slim-bookworm |
68 | 76 |
|
| 77 | +# Re-declare build arg for production stage |
| 78 | +ARG USE_DOCLING=false |
| 79 | + |
69 | 80 | WORKDIR /app |
70 | 81 |
|
71 | | -# Install runtime dependencies (Tesseract for Docling OCR, curl for health check) |
| 82 | +# Install runtime dependencies (curl for health check, tesseract only if Docling enabled) |
72 | 83 | RUN apt-get update && apt-get install -y --no-install-recommends \ |
73 | 84 | curl \ |
74 | | - tesseract-ocr \ |
75 | | - tesseract-ocr-eng \ |
76 | | - libgl1 \ |
77 | | - libglib2.0-0 \ |
| 85 | + && if [ "$USE_DOCLING" = "true" ]; then \ |
| 86 | + apt-get install -y --no-install-recommends \ |
| 87 | + tesseract-ocr \ |
| 88 | + tesseract-ocr-eng \ |
| 89 | + libgl1 \ |
| 90 | + libglib2.0-0; \ |
| 91 | + fi \ |
78 | 92 | && rm -rf /var/lib/apt/lists/* |
79 | 93 |
|
80 | 94 | # Copy Python dependencies from builder |
|
0 commit comments