Skip to content

Commit b5a0eab

Browse files
committed
Remove redundant test-python-unit job from build_container_image
The test-python-unit job was running full integration tests inside Docker containers, but these tests are already covered by lint_and_unit.yaml which: - Runs on both PRs and pushes to main - Tests on both amd64 and arm64 architectures - Uses the same services (Postgres, OpenSearch, MinIO, Apache Tika) - Runs the same instrumented_test_runner.py This caused duplicate test execution on every push to main: 1. lint_and_unit.unit (tests on host) 2. build_container_image.test-python-unit (tests in container) Changes: - Removed test-python-unit job entirely (275 lines) - Updated build-data-processing dependency: removed test-python-unit - test-multi-arch still validates containers with smoke tests Benefits: - Eliminates duplicate test runs on push to main - Saves ~10-15 minutes per push to main - Clearer workflow purpose: build_container_image focuses on building and publishing images, not comprehensive testing - lint_and_unit handles all test coverage
1 parent 50311ac commit b5a0eab

1 file changed

Lines changed: 1 addition & 275 deletions

File tree

.github/workflows/build_container_image.yaml

Lines changed: 1 addition & 275 deletions
Original file line numberDiff line numberDiff line change
@@ -76,280 +76,6 @@ jobs:
7676
docker system prune -f
7777
docker image prune -af
7878
79-
test-python-unit:
80-
name: Run Python unit tests
81-
strategy:
82-
fail-fast: false
83-
matrix:
84-
include:
85-
- platform: linux/amd64
86-
runner: ubuntu-latest
87-
arch: amd64
88-
- platform: linux/arm64
89-
runner: ubuntu-24.04-arm
90-
arch: arm64
91-
runs-on: ${{ matrix.runner }}
92-
timeout-minutes: 60
93-
services:
94-
postgres:
95-
image: postgres:11
96-
env:
97-
POSTGRES_PASSWORD: queridodiario
98-
POSTGRES_USER: queridodiario
99-
POSTGRES_DB: queridodiariodb
100-
options: >-
101-
--health-cmd pg_isready
102-
--health-interval 10s
103-
--health-timeout 5s
104-
--health-retries 5
105-
ports:
106-
- 5432:5432
107-
108-
opensearch:
109-
image: opensearchproject/opensearch:2.9.0
110-
env:
111-
discovery.type: single-node
112-
plugins.security.ssl.http.enabled: false
113-
plugins.security.disabled: true
114-
OPENSEARCH_INITIAL_ADMIN_PASSWORD: admin
115-
options: >-
116-
--health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1"
117-
--health-interval 15s
118-
--health-timeout 10s
119-
--health-retries 20
120-
ports:
121-
- 9200:9200
122-
123-
apache-tika:
124-
image: ghcr.io/${{ github.repository }}/apache-tika:latest
125-
ports:
126-
- 9998:9998
127-
128-
steps:
129-
- name: Checkout code
130-
uses: actions/checkout@v4
131-
132-
- name: Start MinIO
133-
run: |
134-
docker run -d \
135-
--name minio \
136-
--network host \
137-
--health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \
138-
--health-interval 10s \
139-
--health-timeout 5s \
140-
--health-retries 5 \
141-
-e MINIO_ROOT_USER=minio-access-key \
142-
-e MINIO_ROOT_PASSWORD=minio-secret-key \
143-
quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \
144-
server /data --console-address :9001
145-
146-
# Wait for MinIO to be ready
147-
echo "Waiting for MinIO to be ready..."
148-
timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done'
149-
echo "MinIO is ready"
150-
151-
- name: Create MinIO bucket
152-
run: |
153-
# Wait for MinIO to be ready
154-
for i in {1..30}; do
155-
if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then
156-
echo "MinIO is ready"
157-
break
158-
fi
159-
echo "Waiting for MinIO... ($i/30)"
160-
sleep 2
161-
done
162-
163-
# Detect architecture and download appropriate MinIO client
164-
ARCH=$(uname -m)
165-
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
166-
MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc"
167-
else
168-
MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc"
169-
fi
170-
171-
echo "Downloading MinIO client for architecture: $ARCH"
172-
curl -sLo /tmp/mc "$MC_URL"
173-
chmod +x /tmp/mc
174-
175-
# Configure MinIO client
176-
/tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key
177-
178-
# Create bucket if it doesn't exist
179-
/tmp/mc mb minio/queridodiariobucket --ignore-existing || true
180-
181-
echo "MinIO bucket created successfully"
182-
183-
- name: Create OpenSearch index
184-
timeout-minutes: 2
185-
run: |
186-
echo "=== Creating OpenSearch index ==="
187-
echo "OpenSearch host: http://localhost:9200"
188-
echo "Testing connectivity..."
189-
curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding"
190-
echo "Running init script..."
191-
chmod +x init-scripts/opensearch/create-opensearch-index.sh
192-
OPENSEARCH_HOST=http://localhost:9200 \
193-
OPENSEARCH_USER=admin \
194-
OPENSEARCH_PASSWORD=admin \
195-
INDEX_NAME=querido-diario \
196-
./init-scripts/opensearch/create-opensearch-index.sh
197-
echo "=== OpenSearch index creation completed ==="
198-
199-
- name: Set up Docker Buildx
200-
uses: docker/setup-buildx-action@v3
201-
with:
202-
driver-opts: |
203-
image=moby/buildkit:v0.12.5
204-
205-
- name: Build test image
206-
uses: docker/build-push-action@v5
207-
with:
208-
context: .
209-
file: ./Dockerfile
210-
platforms: ${{ matrix.platform }}
211-
load: true
212-
cache-from: type=gha,scope=unittest-${{ matrix.arch }}
213-
cache-to: type=gha,mode=max,scope=unittest-${{ matrix.arch }}
214-
tags: test-data-processing:unittest-${{ matrix.arch }}
215-
216-
- name: Build Apache Tika test image
217-
uses: docker/build-push-action@v5
218-
with:
219-
context: .
220-
file: ./Dockerfile_apache_tika
221-
platforms: ${{ matrix.platform }}
222-
load: true
223-
cache-from: type=gha,scope=tika-unittest-${{ matrix.arch }}
224-
cache-to: type=gha,mode=max,scope=tika-unittest-${{ matrix.arch }}
225-
tags: test-apache-tika:unittest-${{ matrix.arch }}
226-
227-
- name: Start Apache Tika server
228-
run: |
229-
docker run -d -p 9998:9998 --name tika-server-${{ matrix.arch }} test-apache-tika:unittest-${{ matrix.arch }}
230-
sleep 15
231-
232-
# Verify Tika is running
233-
for i in {1..6}; do
234-
if curl -f http://localhost:9998/version > /dev/null 2>&1; then
235-
echo "✅ Apache Tika server is ready on ${{ matrix.platform }}"
236-
break
237-
fi
238-
echo "Waiting for Tika server on ${{ matrix.platform }}... ($i/6)"
239-
sleep 5
240-
done
241-
242-
- name: Run Python unit tests
243-
timeout-minutes: 5
244-
env:
245-
PYTHONPATH: /mnt/code
246-
PYTHONUNBUFFERED: 1
247-
POSTGRES_PASSWORD: queridodiario
248-
POSTGRES_USER: queridodiario
249-
POSTGRES_DB: queridodiariodb
250-
POSTGRES_HOST: localhost
251-
POSTGRES_PORT: 5432
252-
STORAGE_REGION: us-east-1
253-
STORAGE_ENDPOINT: http://localhost:9000
254-
STORAGE_ACCESS_KEY: minio-access-key
255-
STORAGE_ACCESS_SECRET: minio-secret-key
256-
STORAGE_BUCKET: queridodiariobucket
257-
OPENSEARCH_HOST: http://localhost:9200
258-
OPENSEARCH_INDEX: querido-diario
259-
OPENSEARCH_USER: admin
260-
OPENSEARCH_PASSWORD: admin
261-
APACHE_TIKA_SERVER: http://localhost:9998
262-
DEBUG: 1
263-
HF_HUB_OFFLINE: 1
264-
TRANSFORMERS_OFFLINE: 1
265-
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
266-
run: |
267-
docker run --rm \
268-
--network host \
269-
-e PYTHONPATH \
270-
-e PYTHONUNBUFFERED \
271-
-e POSTGRES_PASSWORD \
272-
-e POSTGRES_USER \
273-
-e POSTGRES_DB \
274-
-e POSTGRES_HOST \
275-
-e POSTGRES_PORT \
276-
-e STORAGE_REGION \
277-
-e STORAGE_ENDPOINT \
278-
-e STORAGE_ACCESS_KEY \
279-
-e STORAGE_ACCESS_SECRET \
280-
-e STORAGE_BUCKET \
281-
-e OPENSEARCH_HOST \
282-
-e OPENSEARCH_INDEX \
283-
-e OPENSEARCH_USER \
284-
-e OPENSEARCH_PASSWORD \
285-
-e APACHE_TIKA_SERVER \
286-
-e DEBUG \
287-
-e HF_HUB_OFFLINE \
288-
-e TRANSFORMERS_OFFLINE \
289-
-e SENTENCE_TRANSFORMERS_HOME \
290-
test-data-processing:unittest-${{ matrix.arch }} \
291-
python -m unittest discover -s tests -p "*.py" -v
292-
293-
- name: Run coverage report
294-
env:
295-
PYTHONPATH: /mnt/code
296-
PYTHONUNBUFFERED: 1
297-
POSTGRES_PASSWORD: queridodiario
298-
POSTGRES_USER: queridodiario
299-
POSTGRES_DB: queridodiariodb
300-
POSTGRES_HOST: localhost
301-
POSTGRES_PORT: 5432
302-
STORAGE_REGION: us-east-1
303-
STORAGE_ENDPOINT: http://localhost:9000
304-
STORAGE_ACCESS_KEY: minio-access-key
305-
STORAGE_ACCESS_SECRET: minio-secret-key
306-
STORAGE_BUCKET: queridodiariobucket
307-
OPENSEARCH_HOST: http://localhost:9200
308-
OPENSEARCH_INDEX: querido-diario
309-
OPENSEARCH_USER: admin
310-
OPENSEARCH_PASSWORD: admin
311-
APACHE_TIKA_SERVER: http://localhost:9998
312-
DEBUG: 1
313-
HF_HUB_OFFLINE: 1
314-
TRANSFORMERS_OFFLINE: 1
315-
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
316-
run: |
317-
docker run --rm \
318-
--network host \
319-
-e PYTHONPATH \
320-
-e PYTHONUNBUFFERED \
321-
-e POSTGRES_PASSWORD \
322-
-e POSTGRES_USER \
323-
-e POSTGRES_DB \
324-
-e POSTGRES_HOST \
325-
-e POSTGRES_PORT \
326-
-e STORAGE_REGION \
327-
-e STORAGE_ENDPOINT \
328-
-e STORAGE_ACCESS_KEY \
329-
-e STORAGE_ACCESS_SECRET \
330-
-e STORAGE_BUCKET \
331-
-e OPENSEARCH_HOST \
332-
-e OPENSEARCH_INDEX \
333-
-e OPENSEARCH_USER \
334-
-e OPENSEARCH_PASSWORD \
335-
-e APACHE_TIKA_SERVER \
336-
-e DEBUG \
337-
-e HF_HUB_OFFLINE \
338-
-e TRANSFORMERS_OFFLINE \
339-
-e SENTENCE_TRANSFORMERS_HOME \
340-
test-data-processing:unittest-${{ matrix.arch }} \
341-
bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m"
342-
343-
- name: Cleanup
344-
if: always()
345-
run: |
346-
docker stop tika-server-${{ matrix.arch }} || true
347-
docker rm tika-server-${{ matrix.arch }} || true
348-
docker stop minio || true
349-
docker rm minio || true
350-
docker system prune -f
351-
docker image prune -af
352-
35379
build-data-processing:
35480
name: Build data processing container image
35581
strategy:
@@ -363,7 +89,7 @@ jobs:
36389
runner: ubuntu-24.04-arm
36490
arch: arm64
36591
runs-on: ${{ matrix.runner }}
366-
needs: [test-multi-arch, test-python-unit]
92+
needs: [test-multi-arch]
36793
timeout-minutes: 90
36894
steps:
36995
- name: Checkout code

0 commit comments

Comments
 (0)