-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathDockerfile.worker
More file actions
92 lines (85 loc) · 4.58 KB
/
Dockerfile.worker
File metadata and controls
92 lines (85 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# DataPusher+ Prefect worker image.
#
# Base: ``ckan/ckan-dev:2.11`` (the same image the CKAN container uses
# in this stack) because DP+'s flow code does an import-time
# ``_bootstrap_ckan_app_context`` that calls ``ckan.make_app()`` — so the
# worker's Python env needs to have CKAN installed alongside Prefect.
# The earlier ``prefecthq/prefect:3-latest`` base shaved a couple
# hundred MB off the image but left flow runs crashing with
# ``ModuleNotFoundError: No module named 'ckan'`` the moment the worker
# subprocess tried to import the flow.
#
# Build from the repo root:
# docker build -f Dockerfile.worker -t datapusher-plus-worker:dev .
# ckan/ckan-dev:2.11 doesn't ship an arm64 manifest; pin to linux/amd64
# so Docker on Apple Silicon builds via emulation instead of failing
# with ``no match for platform in manifest``.
FROM --platform=linux/amd64 ckan/ckan-dev:2.11
ARG QSV_VERSION=20.0.0
ARG QSV_RELEASE=https://github.com/dathere/qsv/releases/download
# Root for the apt installs and pip writes. ckan-dev's default user is
# ``ckan``; compose flips it back via the ``user: root`` override on the
# worker service for the same reason (the bash command needs apt-get).
USER root
# System dependencies:
# * ca-certificates / curl / unzip — for the qsv download below.
# * gdal-bin / libgdal-dev — fiona builds from sdist and needs
# ``gdal-config`` + GDAL headers. Same package set CI installs
# (.github/workflows/main.yml).
# * libspatialindex-dev / libgeos-dev / libproj-dev — shapely /
# pyproj / rtree expect them when building from source.
# * build-essential — gcc/make for those builds.
#
# ckan-dev already ships gcc + libpq etc., but the geo libs aren't on
# the base image and we don't want to silently rely on what is.
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl unzip \
build-essential \
gdal-bin libgdal-dev \
libspatialindex-dev libgeos-dev libproj-dev \
# DP+ ValidationStage shells out to ``uchardet`` for charset
# detection and ``file`` for MIME-type sniffing — both are
# plain CLI tools, not Python packages. Without them the flow
# crashes at the FormatConverter stage with ``FileNotFoundError:
# [Errno 2] No such file or directory: 'uchardet'``.
uchardet file \
# Issue #221: ``b3sum`` is the Rust BLAKE3 CLI. DP+'s download
# stage hashes via the Python ``blake3`` package (from
# requirements.txt) — ``b3sum`` itself is installed so operators
# / debugging sessions can verify hashes ad-hoc against the
# same algorithm, and so test scripts can compute reference
# digests without spinning up Python.
b3sum \
&& rm -rf /var/lib/apt/lists/* \
# MUSL build, not GNU — ckan/ckan-dev:2.11 (Debian bookworm)
# ships GLIBC 2.36, and qsv's GNU build requires GLIBC 2.38+.
# The MUSL build is statically linked and works on any Linux ABI.
&& curl -fsSL "${QSV_RELEASE}/${QSV_VERSION}/qsv-${QSV_VERSION}-x86_64-unknown-linux-musl.zip" -o /tmp/qsv.zip \
&& unzip -j /tmp/qsv.zip 'qsvdp' -d /usr/local/bin/ \
&& chmod +x /usr/local/bin/qsvdp \
&& rm /tmp/qsv.zip
# ``gdal-config`` lookup + headers for any subprocess pip install that
# rebuilds fiona / shapely / pyproj from sdist. We DON'T pip-install
# the GDAL Python binding here — nothing in this codebase imports
# ``osgeo`` (verified with grep), fiona / shapely / pyproj link the
# GDAL C library directly via ``gdal-config``, and pinning GDAL at
# image-build time would get silently re-resolved by the runtime
# ``pip install -e .`` the compose worker command does next.
ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
ENV C_INCLUDE_PATH=/usr/include/gdal
# Install DP+ from the bind-mounted source tree.
#
# Path note: the worker MUST mount the repo at the SAME absolute path
# the CKAN container uses (``/srv/app/src/datapusher-plus``). The
# ``datapusher_plus prefect-deploy`` command runs inside the CKAN
# container and bakes ``Path(dp_pkg.__file__).resolve().parent`` into
# the deployment as the source-pull location. If the worker mounts
# under a different path, ``set_working_directory`` step crashes with
# ``FileNotFoundError: '/srv/app/src/datapusher-plus/ckanext/...'`` and
# every flow run hangs in ``Pending`` indefinitely.
WORKDIR /srv/app/src/datapusher-plus
COPY requirements.txt /srv/app/src/datapusher-plus/requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# The worker's actual command is provided by docker-compose; this is
# the default fallback.
CMD ["prefect", "worker", "start", "--pool", "datapusher-plus"]