Skip to content

Commit 43b6539

Browse files
committed
Make Docling optional to fix CI disk space
1 parent 121b487 commit 43b6539

3 files changed

Lines changed: 39 additions & 14 deletions

File tree

.github/workflows/deploy.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ jobs:
3838
3939
- name: Build backend image
4040
run: |
41-
docker build -t ghcr.io/${{ github.repository_owner }}/portfolio-backend:latest ./backend
41+
# USE_DOCLING=false to avoid disk space issues on GitHub runners
42+
# The app falls back to PyPDF/python-docx for document processing
43+
docker build \
44+
--build-arg USE_DOCLING=false \
45+
-t ghcr.io/${{ github.repository_owner }}/portfolio-backend:latest \
46+
./backend
4247
4348
- name: Push frontend image
4449
run: |

backend/Dockerfile

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
# =============================================================================
22
# Backend Dockerfile - FastAPI Application
33
# =============================================================================
4-
# Two-stage build with Docling support (optional via USE_DOCLING env var)
5-
# First build takes ~10-15 minutes, subsequent builds use cache (~2-3 min)
4+
# Two-stage build with Docling support (optional via USE_DOCLING build arg)
5+
# Set USE_DOCLING=false to skip Docling install (saves ~2GB disk, uses PyPDF fallback)
66
# =============================================================================
77

88
FROM python:3.11-slim-bookworm AS builder
99

10+
# Build arg to control Docling installation (default: false for smaller images)
11+
ARG USE_DOCLING=false
12+
1013
WORKDIR /app
1114

12-
# Install build dependencies (including those needed for Docling)
15+
# Install build dependencies
1316
RUN apt-get update && apt-get install -y --no-install-recommends \
1417
build-essential \
1518
gcc \
@@ -52,29 +55,40 @@ RUN --mount=type=cache,target=/root/.cache/pip \
5255
psycopg2-binary>=2.9.9
5356

5457
# =============================================================================
55-
# Layer 2: Docling dependencies (slow, ~8-10 min, cached after first build)
58+
# Layer 2: Docling dependencies (optional, ~8-10 min, ~2GB disk)
59+
# Only installed if USE_DOCLING=true
5660
# =============================================================================
5761
RUN --mount=type=cache,target=/root/.cache/pip \
5862
--mount=type=cache,target=/root/.cache/huggingface \
59-
pip install --user --prefer-binary \
60-
transformers>=4.47.0 \
61-
docling>=2.14.0 \
62-
docling-core>=2.4.0
63+
if [ "$USE_DOCLING" = "true" ]; then \
64+
pip install --user --prefer-binary \
65+
transformers>=4.47.0 \
66+
docling>=2.14.0 \
67+
docling-core>=2.4.0; \
68+
else \
69+
echo "Skipping Docling install (USE_DOCLING=$USE_DOCLING)"; \
70+
fi
6371

6472
# =============================================================================
6573
# Production stage
6674
# =============================================================================
6775
FROM python:3.11-slim-bookworm
6876

77+
# Re-declare build arg for production stage
78+
ARG USE_DOCLING=false
79+
6980
WORKDIR /app
7081

71-
# Install runtime dependencies (Tesseract for Docling OCR, curl for health check)
82+
# Install runtime dependencies (curl for health check, tesseract only if Docling enabled)
7283
RUN apt-get update && apt-get install -y --no-install-recommends \
7384
curl \
74-
tesseract-ocr \
75-
tesseract-ocr-eng \
76-
libgl1 \
77-
libglib2.0-0 \
85+
&& if [ "$USE_DOCLING" = "true" ]; then \
86+
apt-get install -y --no-install-recommends \
87+
tesseract-ocr \
88+
tesseract-ocr-eng \
89+
libgl1 \
90+
libglib2.0-0; \
91+
fi \
7892
&& rm -rf /var/lib/apt/lists/*
7993

8094
# Copy Python dependencies from builder

docker-compose.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ services:
1212
build:
1313
context: ./backend
1414
dockerfile: Dockerfile
15+
args:
16+
# Set USE_DOCLING=true for advanced PDF processing with Docling
17+
# Set USE_DOCLING=false for faster builds (uses PyPDF fallback)
18+
- USE_DOCLING=${USE_DOCLING:-false}
1519
container_name: portfolio-backend
1620
restart: unless-stopped
1721
ports:
@@ -251,6 +255,8 @@ services:
251255
build:
252256
context: ./backend
253257
dockerfile: Dockerfile
258+
args:
259+
- USE_DOCLING=${USE_DOCLING:-false}
254260
container_name: portfolio-worker
255261
command: celery -A app.tasks.celery_app worker --loglevel=info
256262
environment:

0 commit comments

Comments
 (0)