Skip to content

Commit 9cd0e70

Browse files
authored
fix: reenable arm64 builds for docker (#3045)
### Summary Closes #3034 and reenables ARM64 in the docker build and publish job. This was taken out in #3039 because we've only build `libreoffice` for AMD64 and not ARM64. If Chainguard publishes an `apk` for `libreoffice`, we can support a Chainguard image for both architectures. The smoke test now differs for both architectures, to reflect differences in the directory structure. ### Testing Build and publish ran successfully for ARM64 (job [here](https://github.com/Unstructured-IO/unstructured/actions/runs/9129712470/job/25104907497)) and AMD64 (job [here](https://github.com/Unstructured-IO/unstructured/actions/runs/9129712470/job/25104907826)).
1 parent 1c8b2b2 commit 9cd0e70

File tree

5 files changed

+63
-22
lines changed

5 files changed

+63
-22
lines changed

Diff for: .github/workflows/docker-publish.yml

+11-17
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@ jobs:
2424
build-images:
2525
strategy:
2626
matrix:
27-
# NOTE(robinson) - temporarily disabling arm since the libreoffice packages only
28-
# works on amd right now
29-
docker-platform: ["linux/amd64"]
30-
# docker-platform: ["linux/arm64", "linux/amd64"]
27+
docker-platform: ["linux/arm64", "linux/amd64"]
3128
runs-on: ubuntu-latest-m
3229
needs: set-short-sha
3330
env:
@@ -53,6 +50,7 @@ jobs:
5350
make docker-dl-packages
5451
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
5552
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
53+
-f Dockerfile-$ARCH \
5654
--build-arg PIP_VERSION=$PIP_VERSION \
5755
--build-arg BUILDKIT_INLINE_CACHE=1 \
5856
--progress plain \
@@ -72,8 +70,7 @@ jobs:
7270
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
7371
make docker-test CI=true TEST_FILE=test_unstructured/partition/test_text.py
7472
fi
75-
# NOTE(robinson) - disabling smoke because there's no notebook user anymore
76-
# DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
73+
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
7774
- name: Push images
7875
run: |
7976
# write to the build repository to cache for the publish-images job
@@ -97,25 +94,22 @@ jobs:
9794
- name: Pull AMD image
9895
run: |
9996
docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA
100-
# NOTE(robinson) - put this back in when we reenable ARM
101-
# - name: Pull ARM image
102-
# run: |
103-
# docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
97+
- name: Pull ARM image
98+
run: |
99+
docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
104100
- name: Push latest build tags for AMD and ARM
105101
run: |
106102
# these are used to construct the final manifest but also cache-from in subsequent runs
107103
docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
108104
docker push $DOCKER_BUILD_REPOSITORY:amd64
109-
# NOTE(robinson) - update this when we reenable ARM
110-
# docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
111-
# docker push $DOCKER_BUILD_REPOSITORY:arm64
105+
docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
106+
docker push $DOCKER_BUILD_REPOSITORY:arm64
112107
- name: Push multiarch manifest
113108
run: |
114-
# NOTE(robinson) - update this when we reenable ARM
115-
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64
109+
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
116110
docker manifest push $DOCKER_REPOSITORY:latest
117-
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
111+
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
118112
docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA
119113
VERSION=$(grep -Po '(?<=__version__ = ")[^"]*' unstructured/__version__.py)
120-
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64
114+
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
121115
docker manifest push $DOCKER_REPOSITORY:$VERSION

Diff for: Dockerfile renamed to Dockerfile-amd64

File renamed without changes.

Diff for: Dockerfile-arm64

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# syntax=docker/dockerfile:experimental
2+
FROM quay.io/unstructured-io/base-images:rocky9.2-9@sha256:73d8492452f086144d4b92b7931aa04719f085c74d16cae81e8826ef873729c9 as base
3+
4+
# NOTE(crag): NB_USER ARG for mybinder.org compat:
5+
# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
6+
ARG NB_USER=notebook-user
7+
ARG NB_UID=1000
8+
ARG PIP_VERSION
9+
10+
# Set up environment
11+
ENV HOME /home/${NB_USER}
12+
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
13+
ENV PATH="/home/usr/.local/bin:${PATH}"
14+
15+
RUN groupadd --gid ${NB_UID} ${NB_USER}
16+
RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
17+
WORKDIR ${HOME}
18+
19+
FROM base as deps
20+
# Copy and install Unstructured
21+
COPY requirements requirements
22+
23+
RUN python3.10 -m pip install pip==${PIP_VERSION} && \
24+
dnf -y groupinstall "Development Tools" && \
25+
find requirements/ -type f -name "*.txt" -exec python3 -m pip install --no-cache -r '{}' ';' && \
26+
dnf -y groupremove "Development Tools" && \
27+
dnf clean all
28+
29+
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
30+
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
31+
32+
FROM deps as code
33+
34+
USER ${NB_USER}
35+
36+
COPY example-docs example-docs
37+
COPY unstructured unstructured
38+
39+
RUN python3.10 -c "from unstructured.partition.model_init import initialize; initialize()"
40+
41+
CMD ["/bin/bash"]

Diff for: scripts/docker-build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ DOCKER_REPOSITORY="${DOCKER_REPOSITORY:-quay.io/unstructured-io/unstructured}"
55
PIP_VERSION="${PIP_VERSION:-23.1.2}"
66
DOCKER_IMAGE="${DOCKER_IMAGE:-unstructured:dev}"
77

8-
DOCKER_BUILD_CMD=(docker buildx build --load -f Dockerfile
8+
DOCKER_BUILD_CMD=(docker buildx build --load -f Dockerfile-amd64
99
--build-arg PIP_VERSION="$PIP_VERSION"
1010
--build-arg BUILDKIT_INLINE_CACHE=1
1111
--progress plain

Diff for: scripts/docker-smoke-test.sh

+10-4
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,16 @@ trap stop_container EXIT
3838
await_container
3939

4040
# Run the tests
41-
docker cp test_unstructured_ingest $CONTAINER_NAME:/app
42-
docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest
43-
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R nonroot:nonroot /app/test_unstructured_ingest"
44-
docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/wikipedia.sh"
41+
if [[ "$DOCKER_IMAGE" == *"arm64"* ]]; then
42+
docker cp test_unstructured_ingest $CONTAINER_NAME:/home/notebook-user
43+
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R 1000:1000 /home/notebook-user/test_unstructured_ingest"
44+
docker exec "$CONTAINER_NAME" /bin/bash -c "/home/notebook-user/test_unstructured_ingest/src/wikipedia.sh"
45+
else
46+
docker cp test_unstructured_ingest $CONTAINER_NAME:/app
47+
docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest
48+
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R nonroot:nonroot /app/test_unstructured_ingest"
49+
docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/wikipedia.sh"
50+
fi
4551

4652
result=$?
4753
exit $result

0 commit comments

Comments
 (0)