-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathDockerfile
More file actions
152 lines (123 loc) · 6.15 KB
/
Dockerfile
File metadata and controls
152 lines (123 loc) · 6.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
FROM ubuntu:noble
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG NO_PROXY
ARG http_proxy
ARG https_proxy
ARG no_proxy
ENV HTTP_PROXY=$HTTP_PROXY
ENV HTTPS_PROXY=$HTTPS_PROXY
ENV NO_PROXY=$NO_PROXY
ENV http_proxy=$HTTP_PROXY
ENV https_proxy=$HTTPS_PROXY
ENV no_proxy=$NO_PROXY
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_PRIORITY=critical
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,display
# Enables Python to generate .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE=0
# Turns off buffering for easier container logging
ENV PYTHONUNBUFFERED=1
# ENV SETUPTOOLS_USE_DISTUTILS=stdlib
# default user
USER root
# Avoid AppStream/dep11 metadata downloads that are flaky during mirror sync.
RUN rm -f /etc/apt/apt.conf.d/50appstream && \
printf 'Acquire::Retries "5";\nAcquire::Languages "none";\n' > /etc/apt/apt.conf.d/99apt-tuning
# install extra features
RUN apt-get -o Acquire::Retries=5 update -yq && \
apt-get upgrade -y && \
apt-get install -y software-properties-common
# add extra repos
RUN apt-add-repository -y -n multiverse && \
apt-add-repository -y -n universe && \
add-apt-repository -y -n ppa:graphics-drivers/ppa && \
apt-get -o Acquire::Retries=5 update -yq && \
apt-get upgrade -y
# install req packages
RUN apt-get install -y --no-install-recommends python3-all-dev python3-dev python3.12 python3-pip libpython3.12-dev python3.12-dev python3.12-venv python3-uno
RUN apt-get -y --no-install-recommends -o Dpkg::Options::="--force-confold" -y -o Dpkg::Options::="--force-confdef" -fuy dist-upgrade && \
apt-get install -y --no-install-recommends \
gnupg \
libssl-dev \
wget \
curl \
gnupg \
gnupg-agent \
dirmngr \
ca-certificates \
apt-transport-https \
fonts-dejavu \
build-essential \
gfortran \
gcc \
g++
##### utils for python and TESSERACT
RUN echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | debconf-set-selections
RUN apt-get install -y --no-install-recommends fontconfig ttf-mscorefonts-installer libimage-exiftool-perl libtcnative-1 \
libsm6 libxext6 gstreamer1.0-libav fonts-deva fonts-dejavu fonts-gfs-didot fonts-gfs-didot-classic fonts-junicode fonts-ebgaramond fonts-noto-cjk fonts-takao-gothic fonts-vlgothic \
ghostscript ghostscript-x gsfonts gsfonts-other gsfonts-x11 fonts-croscore fonts-crosextra-caladea fonts-crosextra-carlito fonts-liberation fonts-open-sans fonts-noto-core fonts-ibm-plex fonts-urw-base35 \
fonts-noto fonts-noto-cjk fonts-noto-extra xfonts-terminus fonts-font-awesome fonts-hack fonts-inconsolata fonts-liberation2 fonts-mononoki \
libpcre3 libpcre3-dev \
mesa-opencl-icd pocl-opencl-icd libvips-tools libvips libvips-dev \
imagemagick libcairo2-dev tesseract-ocr tesseract-ocr-all libtesseract5 libtesseract-dev libleptonica-dev liblept5
# tessaract language packages
RUN apt-get install -y --no-install-recommends --fix-missing tesseract-ocr-osd tesseract-ocr-lat \
tesseract-ocr-eng tesseract-ocr-enm tesseract-ocr-ita tesseract-ocr-osd tesseract-ocr-script-latn \
tesseract-ocr-fra tesseract-ocr-frk tesseract-ocr-deu tesseract-ocr-ces tesseract-ocr-dan tesseract-ocr-nld tesseract-ocr-nor \
tesseract-ocr-spa tesseract-ocr-swe tesseract-ocr-slk tesseract-ocr-ron tesseract-ocr-script-grek
# Pillow package requirements
RUN apt-get install -y --no-install-recommends tcl8.6-dev tk8.6-dev libopenjp2-7-dev libharfbuzz-dev libfribidi-dev libxcb1-dev libtiff5-dev libjpeg8-dev zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev libglib2.0-dev libgl1
# python3 poppler requirement
RUN apt-get install -y --no-install-recommends poppler-utils
# libre office and java
RUN apt-get install -y --no-install-recommends default-jre libreoffice-java-common libreoffice libreoffice-script-provider-python
# build font cache
RUN fc-cache -f -v
# there is a bug in the blinker package that causes issues with uwsgi
# (this removes software-properties-common)
RUN apt remove -y python3-blinker
RUN apt-get clean autoclean && apt-get autoremove --purge -y
# other openCL packages
# beignet-opencl-icd
RUN rm -rf /var/lib/apt/lists/*
# create and copy the app
RUN mkdir /ocr_service
COPY ./ /ocr_service
WORKDIR /ocr_service
# --- Install system-wide unoserver to match requirements.txt (for UNO/system Python) ---
# BEFORE creating the venv so /usr/bin/python3.12 can run unoserver
# the reason for this is that the uno python bindings are tied to the system python
# and will not work in a venv
# so we need to install unoserver globally to match the version in requirements.txt
# this is a bit hacky but it works around the issue of unoserver not being available
# via pip for python3.12 (as of 2025-08)
RUN UNOSERVER_PIN=$(awk -F'==' '/^unoserver==/ {print $2; exit}' requirements.txt || true) && \
if [ -n "$UNOSERVER_PIN" ]; then \
echo "Installing system unoserver==$UNOSERVER_PIN"; \
pip3 install --no-cache-dir --break-system-packages "unoserver==$UNOSERVER_PIN"; \
else \
echo "No exact pin found for unoserver in requirements.txt; installing latest system unoserver"; \
pip3 install --no-cache-dir --break-system-packages unoserver; \
fi
# --- end unoserver system install ---
# Use a virtual environment for Python deps (single-stage build)
ENV VIRTUAL_ENV=/opt/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Install uwsgi from PyPI source using the global tools
RUN python3.12 -m venv "$VIRTUAL_ENV" && "$VIRTUAL_ENV/bin/python" && "$VIRTUAL_ENV/bin/pip" install --no-cache-dir -r ./requirements.txt
# compile the python files
# Byte-compile using venv python
RUN "$VIRTUAL_ENV/bin/python" -m compileall /ocr_service
# Run as non-root by default for Kubernetes restricted policies.
ARG OCR_SERVICE_UID=10001
ARG OCR_SERVICE_GID=10001
RUN groupadd --system --gid "$OCR_SERVICE_GID" ocrsvc && \
useradd --system --uid "$OCR_SERVICE_UID" --gid "$OCR_SERVICE_GID" --create-home --home-dir /home/ocrsvc --shell /usr/sbin/nologin ocrsvc && \
mkdir -p /ocr_service/tmp /ocr_service/log && \
chown -R ocrsvc:ocrsvc /ocr_service/tmp /ocr_service/log /home/ocrsvc
ENV HOME=/home/ocrsvc
USER ocrsvc
# Now run the simple api
CMD ["/bin/bash", "start_service_production.sh"]