@@ -181,6 +181,8 @@ jobs:
181181 env :
182182 discovery.type : single-node
183183 plugins.security.ssl.http.enabled : false
184+ plugins.security.disabled : true
185+ OPENSEARCH_INITIAL_ADMIN_PASSWORD : admin
184186 options : >-
185187 --health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1"
186188 --health-interval 15s
@@ -189,19 +191,94 @@ jobs:
189191 ports :
190192 - 9200:9200
191193
192- minio :
193- image : quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z
194- env :
195- MINIO_ACCESS_KEY : minio-access-key
196- MINIO_SECRET_KEY : minio-secret-key
197- MINIO_DEFAULT_BUCKETS : queridodiariobucket:public
194+ apache-tika :
195+ image : ghcr.io/${{ github.repository }}/apache-tika:latest
198196 ports :
199- - 9000:9000
197+ - 9998:9998
200198
201199 steps :
202200 - name : Checkout code
203201 uses : actions/checkout@v4
204202
203+ - name : Start MinIO
204+ run : |
205+ docker run -d \
206+ --name minio \
207+ --network host \
208+ --health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \
209+ --health-interval 10s \
210+ --health-timeout 5s \
211+ --health-retries 5 \
212+ -e MINIO_ROOT_USER=minio-access-key \
213+ -e MINIO_ROOT_PASSWORD=minio-secret-key \
214+ quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \
215+ server /data --console-address :9001
216+
217+ # Wait for MinIO to be ready
218+ echo "Waiting for MinIO to be ready..."
219+ timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done'
220+ echo "MinIO is ready"
221+
222+ - name : Set up Python
223+ uses : actions/setup-python@v5
224+ with :
225+ python-version : ' 3.11'
226+ cache : ' pip'
227+ cache-dependency-path : requirements.txt
228+
229+ - name : Install dependencies
230+ run : |
231+ python -m pip install --upgrade pip
232+ pip install -r requirements.txt
233+
234+ - name : Create MinIO bucket
235+ run : |
236+ # Wait for MinIO to be ready
237+ for i in {1..30}; do
238+ if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then
239+ echo "MinIO is ready"
240+ break
241+ fi
242+ echo "Waiting for MinIO... ($i/30)"
243+ sleep 2
244+ done
245+
246+ # Detect architecture and download appropriate MinIO client
247+ ARCH=$(uname -m)
248+ if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
249+ MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc"
250+ else
251+ MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc"
252+ fi
253+
254+ echo "Downloading MinIO client for architecture: $ARCH"
255+ curl -sLo /tmp/mc "$MC_URL"
256+ chmod +x /tmp/mc
257+
258+ # Configure MinIO client
259+ /tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key
260+
261+ # Create bucket if it doesn't exist
262+ /tmp/mc mb minio/queridodiariobucket --ignore-existing || true
263+
264+ echo "MinIO bucket created successfully"
265+
266+ - name : Create OpenSearch index
267+ timeout-minutes : 2
268+ run : |
269+ echo "=== Creating OpenSearch index ==="
270+ echo "OpenSearch host: http://localhost:9200"
271+ echo "Testing connectivity..."
272+ curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding"
273+ echo "Running init script..."
274+ chmod +x init-scripts/opensearch/create-opensearch-index.sh
275+ OPENSEARCH_HOST=http://localhost:9200 \
276+ OPENSEARCH_USER=admin \
277+ OPENSEARCH_PASSWORD=admin \
278+ INDEX_NAME=querido-diario \
279+ ./init-scripts/opensearch/create-opensearch-index.sh
280+ echo "=== OpenSearch index creation completed ==="
281+
205282 - name : Free up disk space
206283 run : |
207284 # Remove unnecessary packages and clean up
@@ -258,28 +335,103 @@ jobs:
258335 done
259336
260337 - name : Run Python unit tests
338+ timeout-minutes : 5
339+ env :
340+ PYTHONPATH : /mnt/code
341+ PYTHONUNBUFFERED : 1
342+ POSTGRES_PASSWORD : queridodiario
343+ POSTGRES_USER : queridodiario
344+ POSTGRES_DB : queridodiariodb
345+ POSTGRES_HOST : localhost
346+ POSTGRES_PORT : 5432
347+ STORAGE_REGION : us-east-1
348+ STORAGE_ENDPOINT : http://localhost:9000
349+ STORAGE_ACCESS_KEY : minio-access-key
350+ STORAGE_ACCESS_SECRET : minio-secret-key
351+ STORAGE_BUCKET : queridodiariobucket
352+ OPENSEARCH_HOST : http://localhost:9200
353+ OPENSEARCH_INDEX : querido-diario
354+ OPENSEARCH_USER : admin
355+ OPENSEARCH_PASSWORD : admin
356+ APACHE_TIKA_SERVER : http://localhost:9998
357+ DEBUG : 1
358+ HF_HUB_OFFLINE : 1
359+ TRANSFORMERS_OFFLINE : 1
360+ SENTENCE_TRANSFORMERS_HOME : /tmp/sentence_transformers
261361 run : |
262362 docker run --rm \
263363 --network host \
264- -e PYTHONPATH=/mnt/code \
265- -e POSTGRES_PASSWORD=queridodiario \
266- -e POSTGRES_USER=queridodiario \
267- -e POSTGRES_DB=queridodiariodb \
268- -e POSTGRES_HOST=localhost \
269- -e POSTGRES_PORT=5432 \
364+ -e PYTHONPATH \
365+ -e PYTHONUNBUFFERED \
366+ -e POSTGRES_PASSWORD \
367+ -e POSTGRES_USER \
368+ -e POSTGRES_DB \
369+ -e POSTGRES_HOST \
370+ -e POSTGRES_PORT \
371+ -e STORAGE_REGION \
372+ -e STORAGE_ENDPOINT \
373+ -e STORAGE_ACCESS_KEY \
374+ -e STORAGE_ACCESS_SECRET \
375+ -e STORAGE_BUCKET \
376+ -e OPENSEARCH_HOST \
377+ -e OPENSEARCH_INDEX \
378+ -e OPENSEARCH_USER \
379+ -e OPENSEARCH_PASSWORD \
380+ -e APACHE_TIKA_SERVER \
381+ -e DEBUG \
382+ -e HF_HUB_OFFLINE \
383+ -e TRANSFORMERS_OFFLINE \
384+ -e SENTENCE_TRANSFORMERS_HOME \
270385 test-data-processing:unittest-${{ matrix.arch }} \
271386 python -m unittest discover -s tests -p "*.py" -v
272387
273388 - name : Run coverage report
389+ env :
390+ PYTHONPATH : /mnt/code
391+ PYTHONUNBUFFERED : 1
392+ POSTGRES_PASSWORD : queridodiario
393+ POSTGRES_USER : queridodiario
394+ POSTGRES_DB : queridodiariodb
395+ POSTGRES_HOST : localhost
396+ POSTGRES_PORT : 5432
397+ STORAGE_REGION : us-east-1
398+ STORAGE_ENDPOINT : http://localhost:9000
399+ STORAGE_ACCESS_KEY : minio-access-key
400+ STORAGE_ACCESS_SECRET : minio-secret-key
401+ STORAGE_BUCKET : queridodiariobucket
402+ OPENSEARCH_HOST : http://localhost:9200
403+ OPENSEARCH_INDEX : querido-diario
404+ OPENSEARCH_USER : admin
405+ OPENSEARCH_PASSWORD : admin
406+ APACHE_TIKA_SERVER : http://localhost:9998
407+ DEBUG : 1
408+ HF_HUB_OFFLINE : 1
409+ TRANSFORMERS_OFFLINE : 1
410+ SENTENCE_TRANSFORMERS_HOME : /tmp/sentence_transformers
274411 run : |
275412 docker run --rm \
276413 --network host \
277- -e PYTHONPATH=/mnt/code \
278- -e POSTGRES_PASSWORD=queridodiario \
279- -e POSTGRES_USER=queridodiario \
280- -e POSTGRES_DB=queridodiariodb \
281- -e POSTGRES_HOST=localhost \
282- -e POSTGRES_PORT=5432 \
414+ -e PYTHONPATH \
415+ -e PYTHONUNBUFFERED \
416+ -e POSTGRES_PASSWORD \
417+ -e POSTGRES_USER \
418+ -e POSTGRES_DB \
419+ -e POSTGRES_HOST \
420+ -e POSTGRES_PORT \
421+ -e STORAGE_REGION \
422+ -e STORAGE_ENDPOINT \
423+ -e STORAGE_ACCESS_KEY \
424+ -e STORAGE_ACCESS_SECRET \
425+ -e STORAGE_BUCKET \
426+ -e OPENSEARCH_HOST \
427+ -e OPENSEARCH_INDEX \
428+ -e OPENSEARCH_USER \
429+ -e OPENSEARCH_PASSWORD \
430+ -e APACHE_TIKA_SERVER \
431+ -e DEBUG \
432+ -e HF_HUB_OFFLINE \
433+ -e TRANSFORMERS_OFFLINE \
434+ -e SENTENCE_TRANSFORMERS_HOME \
283435 test-data-processing:unittest-${{ matrix.arch }} \
284436 bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m"
285437
@@ -288,6 +440,8 @@ jobs:
288440 run : |
289441 docker stop tika-server-${{ matrix.arch }} || true
290442 docker rm tika-server-${{ matrix.arch }} || true
443+ docker stop minio || true
444+ docker rm minio || true
291445 docker system prune -f
292446 docker image prune -af
293447
0 commit comments