Skip to content

Commit 4ac3213

Browse files
authored
Merge pull request #110 from scieloorg/markup-get-labels-docx-xml-pipeline
Corrige o fluxo Carregar DOCX → get_labels → XML SPS marcado
2 parents ce60876 + d794718 commit 4ac3213

10 files changed

Lines changed: 2631 additions & 1649 deletions

File tree

.cursor/rules/tests.mdc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
alwaysApply: true
3+
---
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
ARG PYTHON_VERSION=3.12-bookworm
2+
3+
FROM python:${PYTHON_VERSION} AS python
4+
5+
FROM python AS python-build-stage
6+
7+
ARG BUILD_ENVIRONMENT=local
8+
9+
RUN apt-get update && apt-get install --no-install-recommends -y \
10+
build-essential \
11+
git \
12+
libpq-dev \
13+
software-properties-common \
14+
libopenblas-dev \
15+
libomp-dev
16+
17+
RUN apt-get update && \
18+
apt-get install -y ninja-build cmake && \
19+
apt-get clean && rm -rf /var/lib/apt/lists/*
20+
21+
RUN python -m pip install --upgrade pip setuptools wheel
22+
23+
COPY ./requirements .
24+
25+
COPY ./docx_layouts .
26+
27+
RUN python -m pip install --upgrade pip
28+
29+
RUN pip wheel --wheel-dir /usr/src/app/wheels \
30+
-r ${BUILD_ENVIRONMENT}.txt
31+
32+
FROM python AS python-run-stage
33+
34+
ARG BUILD_ENVIRONMENT=local
35+
ARG APP_HOME=/app
36+
ARG DISABLE_AVX=true
37+
ARG LLAMA_VERSION=0.3.14
38+
39+
ENV PYTHONUNBUFFERED 1
40+
ENV PYTHONDONTWRITEBYTECODE 1
41+
ENV BUILD_ENV ${BUILD_ENVIRONMENT}
42+
43+
WORKDIR ${APP_HOME}
44+
45+
RUN if [ -f /etc/apt/sources.list ]; then \
46+
sed -i 's/main/main contrib non-free/' /etc/apt/sources.list; \
47+
elif [ -f /etc/apt/sources.list.d/debian.sources ]; then \
48+
sed -i 's/Components: main/Components: main contrib non-free non-free-firmware/' /etc/apt/sources.list.d/debian.sources; \
49+
fi
50+
51+
RUN apt-get update && apt-get install --no-install-recommends -y \
52+
libpq-dev \
53+
gettext \
54+
default-jre libreoffice libreoffice-java-common ttf-mscorefonts-installer fonts-liberation fonts-liberation2 fonts-crosextra-carlito fonts-crosextra-caladea fonts-dejavu fonts-noto \
55+
build-essential cmake ninja-build \
56+
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
57+
&& rm -rf /var/lib/apt/lists/*
58+
59+
COPY --from=python-build-stage /usr/src/app/wheels /wheels/
60+
61+
RUN pip install --no-cache-dir --no-index --find-links=/wheels/ $(find /wheels/ -name "*.whl" ! -name "llama_cpp_python*") && rm -rf /wheels/
62+
63+
ARG TARGETARCH
64+
RUN set -eux; \
65+
ARCH="${TARGETARCH:-}"; \
66+
if [ -z "${ARCH}" ]; then ARCH="$(uname -m)"; fi; \
67+
if [ "${ARCH}" = "arm64" ] || [ "${ARCH}" = "aarch64" ]; then \
68+
pip install "llama-cpp-python==${LLAMA_VERSION}" \
69+
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
70+
--only-binary=:all: \
71+
--no-cache-dir \
72+
|| { \
73+
export FORCE_CMAKE=1; \
74+
export CMAKE_ARGS="-DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=OFF -DGGML_CPU_ARM_ARCH=armv8-a -DGGML_LLAMAFILE=OFF -DGGML_CPU_REPACK=OFF"; \
75+
pip install "llama-cpp-python==${LLAMA_VERSION}" --force-reinstall --no-cache-dir; \
76+
}; \
77+
else \
78+
pip install "llama-cpp-python==${LLAMA_VERSION}" --prefer-binary --no-cache-dir \
79+
|| { \
80+
export FORCE_CMAKE=1; \
81+
if [ "${DISABLE_AVX}" = "true" ]; then \
82+
export CMAKE_ARGS="-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF -DLLAMA_OPENMP=ON"; \
83+
fi; \
84+
pip install "llama-cpp-python==${LLAMA_VERSION}" --force-reinstall --no-cache-dir; \
85+
}; \
86+
fi
87+
88+
COPY ./compose/production/django/entrypoint /entrypoint
89+
RUN sed -i 's/\r$//g' /entrypoint
90+
RUN chmod +x /entrypoint
91+
92+
COPY ./compose/local/django/start /start
93+
RUN sed -i 's/\r$//g' /start
94+
RUN chmod +x /start
95+
96+
COPY ./compose/local/django/celery/worker/start /start-celeryworker
97+
RUN sed -i 's/\r$//g' /start-celeryworker
98+
RUN chmod +x /start-celeryworker
99+
100+
COPY ./compose/local/django/celery/beat/start /start-celerybeat
101+
RUN sed -i 's/\r$//g' /start-celerybeat
102+
RUN chmod +x /start-celerybeat
103+
104+
COPY ./compose/local/django/celery/flower/start /start-flower
105+
RUN sed -i 's/\r$//g' /start-flower
106+
RUN chmod +x /start-flower
107+
108+
COPY . ${APP_HOME}
109+
110+
ENTRYPOINT ["/entrypoint"]

0 commit comments

Comments
 (0)