Skip to content

Commit 59785e4

Browse files
chore: install all extras in Dockerfile (#419)
* Adds step to install all extras * Adds smoke test of wikipedia ingest to validate in CI
1 parent 32c79ca commit 59785e4

File tree

4 files changed

+58
-0
lines changed

4 files changed

+58
-0
lines changed

Diff for: .github/workflows/docker-publish.yml

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ jobs:
4747
- name: Test AMD image
4848
run: |
4949
DOCKER_PLATFORM="linux/amd64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA" make docker-test
50+
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:amd-$SHORT_SHA make docker-smoke-test
5051
- name: Push AMD image
5152
run: |
5253
# write to the build repository to cache for the publish-images job
@@ -80,6 +81,7 @@ jobs:
8081
run: |
8182
# only run a subset of tests on ARM, since they take a long time with emulation
8283
DOCKER_PLATFORM="linux/arm64" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA" make docker-test TEST_NAME=partition/test_text.py
84+
IMAGE_NAME=$DOCKER_BUILD_REPOSITORY:arm-$SHORT_SHA make docker-smoke-test
8385
- name: Push ARM image
8486
run: |
8587
# write to the build repository to cache for the publish-images job

Diff for: Dockerfile

+7
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
7272
pip install --no-cache -r requirements/test.txt && \
7373
pip install --no-cache -r requirements/huggingface.txt && \
7474
pip install --no-cache -r requirements/dev.txt && \
75+
pip install --no-cache -r requirements/ingest-azure.txt && \
76+
pip install --no-cache -r requirements/ingest-github.txt && \
77+
pip install --no-cache -r requirements/ingest-gitlab.txt && \
78+
pip install --no-cache -r requirements/ingest-google-drive.txt && \
79+
pip install --no-cache -r requirements/ingest-reddit.txt && \
80+
pip install --no-cache -r requirements/ingest-s3.txt && \
81+
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
7582
pip install --no-cache -r requirements/local-inference.txt && \
7683
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/[email protected]#egg=detectron2"
7784

Diff for: Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,7 @@ docker-test:
203203
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
204204
$(DOCKER_IMAGE) \
205205
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
206+
207+
.PHONY: docker-smoke-test
208+
docker-smoke-test:
209+
./scripts/docker-smoke-test.sh

Diff for: scripts/docker-smoke-test.sh

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
3+
# Start the containerized repository and run ingest tests
4+
5+
# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable...
6+
7+
set -eux -o pipefail
8+
9+
CONTAINER_NAME=unstructured-smoke-test
10+
IMAGE_NAME="${IMAGE_NAME:-unstructured:latest}"
11+
12+
# Change to the root of the repository
13+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
14+
cd "$SCRIPT_DIR"/.. || exit 1
15+
16+
start_container() {
17+
echo Starting container "$CONTAINER_NAME"
18+
docker run -dt --rm --name "$CONTAINER_NAME" "$IMAGE_NAME"
19+
}
20+
21+
await_container() {
22+
echo Waiting for container to start
23+
until [ "$(docker inspect -f '{{.State.Status}}' $CONTAINER_NAME)" == "running" ]; do
24+
sleep 1
25+
done
26+
}
27+
28+
stop_container() {
29+
echo Stopping container "$CONTAINER_NAME"
30+
docker stop "$CONTAINER_NAME"
31+
}
32+
33+
start_container
34+
35+
# Regardless of test result, stop the container
36+
trap stop_container EXIT
37+
38+
await_container
39+
40+
# Run the tests
41+
docker cp test_unstructured_ingest $CONTAINER_NAME:/home
42+
docker exec "$CONTAINER_NAME" /bin/bash -c "/home/test_unstructured_ingest/test-ingest-wikipedia.sh"
43+
44+
result=$?
45+
exit $result

0 commit comments

Comments
 (0)