@@ -76,280 +76,6 @@ jobs:
7676 docker system prune -f
7777 docker image prune -af
7878
79- test-python-unit :
80- name : Run Python unit tests
81- strategy :
82- fail-fast : false
83- matrix :
84- include :
85- - platform : linux/amd64
86- runner : ubuntu-latest
87- arch : amd64
88- - platform : linux/arm64
89- runner : ubuntu-24.04-arm
90- arch : arm64
91- runs-on : ${{ matrix.runner }}
92- timeout-minutes : 60
93- services :
94- postgres :
95- image : postgres:11
96- env :
97- POSTGRES_PASSWORD : queridodiario
98- POSTGRES_USER : queridodiario
99- POSTGRES_DB : queridodiariodb
100- options : >-
101- --health-cmd pg_isready
102- --health-interval 10s
103- --health-timeout 5s
104- --health-retries 5
105- ports :
106- - 5432:5432
107-
108- opensearch :
109- image : opensearchproject/opensearch:2.9.0
110- env :
111- discovery.type : single-node
112- plugins.security.ssl.http.enabled : false
113- plugins.security.disabled : true
114- OPENSEARCH_INITIAL_ADMIN_PASSWORD : admin
115- options : >-
116- --health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1"
117- --health-interval 15s
118- --health-timeout 10s
119- --health-retries 20
120- ports :
121- - 9200:9200
122-
123- apache-tika :
124- image : ghcr.io/${{ github.repository }}/apache-tika:latest
125- ports :
126- - 9998:9998
127-
128- steps :
129- - name : Checkout code
130- uses : actions/checkout@v4
131-
132- - name : Start MinIO
133- run : |
134- docker run -d \
135- --name minio \
136- --network host \
137- --health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \
138- --health-interval 10s \
139- --health-timeout 5s \
140- --health-retries 5 \
141- -e MINIO_ROOT_USER=minio-access-key \
142- -e MINIO_ROOT_PASSWORD=minio-secret-key \
143- quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \
144- server /data --console-address :9001
145-
146- # Wait for MinIO to be ready
147- echo "Waiting for MinIO to be ready..."
148- timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done'
149- echo "MinIO is ready"
150-
151- - name : Create MinIO bucket
152- run : |
153- # Wait for MinIO to be ready
154- for i in {1..30}; do
155- if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then
156- echo "MinIO is ready"
157- break
158- fi
159- echo "Waiting for MinIO... ($i/30)"
160- sleep 2
161- done
162-
163- # Detect architecture and download appropriate MinIO client
164- ARCH=$(uname -m)
165- if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
166- MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc"
167- else
168- MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc"
169- fi
170-
171- echo "Downloading MinIO client for architecture: $ARCH"
172- curl -sLo /tmp/mc "$MC_URL"
173- chmod +x /tmp/mc
174-
175- # Configure MinIO client
176- /tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key
177-
178- # Create bucket if it doesn't exist
179- /tmp/mc mb minio/queridodiariobucket --ignore-existing || true
180-
181- echo "MinIO bucket created successfully"
182-
183- - name : Create OpenSearch index
184- timeout-minutes : 2
185- run : |
186- echo "=== Creating OpenSearch index ==="
187- echo "OpenSearch host: http://localhost:9200"
188- echo "Testing connectivity..."
189- curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding"
190- echo "Running init script..."
191- chmod +x init-scripts/opensearch/create-opensearch-index.sh
192- OPENSEARCH_HOST=http://localhost:9200 \
193- OPENSEARCH_USER=admin \
194- OPENSEARCH_PASSWORD=admin \
195- INDEX_NAME=querido-diario \
196- ./init-scripts/opensearch/create-opensearch-index.sh
197- echo "=== OpenSearch index creation completed ==="
198-
199- - name : Set up Docker Buildx
200- uses : docker/setup-buildx-action@v3
201- with :
202- driver-opts : |
203- image=moby/buildkit:v0.12.5
204-
205- - name : Build test image
206- uses : docker/build-push-action@v5
207- with :
208- context : .
209- file : ./Dockerfile
210- platforms : ${{ matrix.platform }}
211- load : true
212- cache-from : type=gha,scope=unittest-${{ matrix.arch }}
213- cache-to : type=gha,mode=max,scope=unittest-${{ matrix.arch }}
214- tags : test-data-processing:unittest-${{ matrix.arch }}
215-
216- - name : Build Apache Tika test image
217- uses : docker/build-push-action@v5
218- with :
219- context : .
220- file : ./Dockerfile_apache_tika
221- platforms : ${{ matrix.platform }}
222- load : true
223- cache-from : type=gha,scope=tika-unittest-${{ matrix.arch }}
224- cache-to : type=gha,mode=max,scope=tika-unittest-${{ matrix.arch }}
225- tags : test-apache-tika:unittest-${{ matrix.arch }}
226-
227- - name : Start Apache Tika server
228- run : |
229- docker run -d -p 9998:9998 --name tika-server-${{ matrix.arch }} test-apache-tika:unittest-${{ matrix.arch }}
230- sleep 15
231-
232- # Verify Tika is running
233- for i in {1..6}; do
234- if curl -f http://localhost:9998/version > /dev/null 2>&1; then
235- echo "✅ Apache Tika server is ready on ${{ matrix.platform }}"
236- break
237- fi
238- echo "Waiting for Tika server on ${{ matrix.platform }}... ($i/6)"
239- sleep 5
240- done
241-
242- - name : Run Python unit tests
243- timeout-minutes : 5
244- env :
245- PYTHONPATH : /mnt/code
246- PYTHONUNBUFFERED : 1
247- POSTGRES_PASSWORD : queridodiario
248- POSTGRES_USER : queridodiario
249- POSTGRES_DB : queridodiariodb
250- POSTGRES_HOST : localhost
251- POSTGRES_PORT : 5432
252- STORAGE_REGION : us-east-1
253- STORAGE_ENDPOINT : http://localhost:9000
254- STORAGE_ACCESS_KEY : minio-access-key
255- STORAGE_ACCESS_SECRET : minio-secret-key
256- STORAGE_BUCKET : queridodiariobucket
257- OPENSEARCH_HOST : http://localhost:9200
258- OPENSEARCH_INDEX : querido-diario
259- OPENSEARCH_USER : admin
260- OPENSEARCH_PASSWORD : admin
261- APACHE_TIKA_SERVER : http://localhost:9998
262- DEBUG : 1
263- HF_HUB_OFFLINE : 1
264- TRANSFORMERS_OFFLINE : 1
265- SENTENCE_TRANSFORMERS_HOME : /tmp/sentence_transformers
266- run : |
267- docker run --rm \
268- --network host \
269- -e PYTHONPATH \
270- -e PYTHONUNBUFFERED \
271- -e POSTGRES_PASSWORD \
272- -e POSTGRES_USER \
273- -e POSTGRES_DB \
274- -e POSTGRES_HOST \
275- -e POSTGRES_PORT \
276- -e STORAGE_REGION \
277- -e STORAGE_ENDPOINT \
278- -e STORAGE_ACCESS_KEY \
279- -e STORAGE_ACCESS_SECRET \
280- -e STORAGE_BUCKET \
281- -e OPENSEARCH_HOST \
282- -e OPENSEARCH_INDEX \
283- -e OPENSEARCH_USER \
284- -e OPENSEARCH_PASSWORD \
285- -e APACHE_TIKA_SERVER \
286- -e DEBUG \
287- -e HF_HUB_OFFLINE \
288- -e TRANSFORMERS_OFFLINE \
289- -e SENTENCE_TRANSFORMERS_HOME \
290- test-data-processing:unittest-${{ matrix.arch }} \
291- python -m unittest discover -s tests -p "*.py" -v
292-
293- - name : Run coverage report
294- env :
295- PYTHONPATH : /mnt/code
296- PYTHONUNBUFFERED : 1
297- POSTGRES_PASSWORD : queridodiario
298- POSTGRES_USER : queridodiario
299- POSTGRES_DB : queridodiariodb
300- POSTGRES_HOST : localhost
301- POSTGRES_PORT : 5432
302- STORAGE_REGION : us-east-1
303- STORAGE_ENDPOINT : http://localhost:9000
304- STORAGE_ACCESS_KEY : minio-access-key
305- STORAGE_ACCESS_SECRET : minio-secret-key
306- STORAGE_BUCKET : queridodiariobucket
307- OPENSEARCH_HOST : http://localhost:9200
308- OPENSEARCH_INDEX : querido-diario
309- OPENSEARCH_USER : admin
310- OPENSEARCH_PASSWORD : admin
311- APACHE_TIKA_SERVER : http://localhost:9998
312- DEBUG : 1
313- HF_HUB_OFFLINE : 1
314- TRANSFORMERS_OFFLINE : 1
315- SENTENCE_TRANSFORMERS_HOME : /tmp/sentence_transformers
316- run : |
317- docker run --rm \
318- --network host \
319- -e PYTHONPATH \
320- -e PYTHONUNBUFFERED \
321- -e POSTGRES_PASSWORD \
322- -e POSTGRES_USER \
323- -e POSTGRES_DB \
324- -e POSTGRES_HOST \
325- -e POSTGRES_PORT \
326- -e STORAGE_REGION \
327- -e STORAGE_ENDPOINT \
328- -e STORAGE_ACCESS_KEY \
329- -e STORAGE_ACCESS_SECRET \
330- -e STORAGE_BUCKET \
331- -e OPENSEARCH_HOST \
332- -e OPENSEARCH_INDEX \
333- -e OPENSEARCH_USER \
334- -e OPENSEARCH_PASSWORD \
335- -e APACHE_TIKA_SERVER \
336- -e DEBUG \
337- -e HF_HUB_OFFLINE \
338- -e TRANSFORMERS_OFFLINE \
339- -e SENTENCE_TRANSFORMERS_HOME \
340- test-data-processing:unittest-${{ matrix.arch }} \
341- bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m"
342-
343- - name : Cleanup
344- if : always()
345- run : |
346- docker stop tika-server-${{ matrix.arch }} || true
347- docker rm tika-server-${{ matrix.arch }} || true
348- docker stop minio || true
349- docker rm minio || true
350- docker system prune -f
351- docker image prune -af
352-
35379 build-data-processing :
35480 name : Build data processing container image
35581 strategy :
36389 runner : ubuntu-24.04-arm
36490 arch : arm64
36591 runs-on : ${{ matrix.runner }}
366- needs : [test-multi-arch, test-python-unit ]
92+ needs : [test-multi-arch]
36793 timeout-minutes : 90
36894 steps :
36995 - name : Checkout code
0 commit comments