Skip to content

Commit 1db31c3

Browse files
committed
Improve build_container_image workflow to match other workflows
Aligned test-python-unit job with lint_and_unit and test_pull_request: - Added plugins.security.disabled and OPENSEARCH_INITIAL_ADMIN_PASSWORD to OpenSearch - Replaced MinIO service with manual docker run for better control - Added apache-tika service - Added Python setup and dependency installation - Added MinIO bucket creation with architecture detection - Added OpenSearch index initialization - Added comprehensive environment variables for tests (OpenSearch, MinIO, Tika, etc.) - Added timeout to test execution - Added MinIO container cleanup These changes ensure tests run with proper service configuration and all required environment variables are passed to the container.
1 parent 3a4e74a commit 1db31c3

1 file changed

Lines changed: 173 additions & 19 deletions

File tree

.github/workflows/build_container_image.yaml

Lines changed: 173 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ jobs:
181181
env:
182182
discovery.type: single-node
183183
plugins.security.ssl.http.enabled: false
184+
plugins.security.disabled: true
185+
OPENSEARCH_INITIAL_ADMIN_PASSWORD: admin
184186
options: >-
185187
--health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1"
186188
--health-interval 15s
@@ -189,19 +191,94 @@ jobs:
189191
ports:
190192
- 9200:9200
191193

192-
minio:
193-
image: quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z
194-
env:
195-
MINIO_ACCESS_KEY: minio-access-key
196-
MINIO_SECRET_KEY: minio-secret-key
197-
MINIO_DEFAULT_BUCKETS: queridodiariobucket:public
194+
apache-tika:
195+
image: ghcr.io/${{ github.repository }}/apache-tika:latest
198196
ports:
199-
- 9000:9000
197+
- 9998:9998
200198

201199
steps:
202200
- name: Checkout code
203201
uses: actions/checkout@v4
204202

203+
- name: Start MinIO
204+
run: |
205+
docker run -d \
206+
--name minio \
207+
--network host \
208+
--health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \
209+
--health-interval 10s \
210+
--health-timeout 5s \
211+
--health-retries 5 \
212+
-e MINIO_ROOT_USER=minio-access-key \
213+
-e MINIO_ROOT_PASSWORD=minio-secret-key \
214+
quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \
215+
server /data --console-address :9001
216+
217+
# Wait for MinIO to be ready
218+
echo "Waiting for MinIO to be ready..."
219+
timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done'
220+
echo "MinIO is ready"
221+
222+
- name: Set up Python
223+
uses: actions/setup-python@v5
224+
with:
225+
python-version: '3.11'
226+
cache: 'pip'
227+
cache-dependency-path: requirements.txt
228+
229+
- name: Install dependencies
230+
run: |
231+
python -m pip install --upgrade pip
232+
pip install -r requirements.txt
233+
234+
- name: Create MinIO bucket
235+
run: |
236+
# Wait for MinIO to be ready
237+
for i in {1..30}; do
238+
if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then
239+
echo "MinIO is ready"
240+
break
241+
fi
242+
echo "Waiting for MinIO... ($i/30)"
243+
sleep 2
244+
done
245+
246+
# Detect architecture and download appropriate MinIO client
247+
ARCH=$(uname -m)
248+
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
249+
MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc"
250+
else
251+
MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc"
252+
fi
253+
254+
echo "Downloading MinIO client for architecture: $ARCH"
255+
curl -sLo /tmp/mc "$MC_URL"
256+
chmod +x /tmp/mc
257+
258+
# Configure MinIO client
259+
/tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key
260+
261+
# Create bucket if it doesn't exist
262+
/tmp/mc mb minio/queridodiariobucket --ignore-existing || true
263+
264+
echo "MinIO bucket created successfully"
265+
266+
- name: Create OpenSearch index
267+
timeout-minutes: 2
268+
run: |
269+
echo "=== Creating OpenSearch index ==="
270+
echo "OpenSearch host: http://localhost:9200"
271+
echo "Testing connectivity..."
272+
curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding"
273+
echo "Running init script..."
274+
chmod +x init-scripts/opensearch/create-opensearch-index.sh
275+
OPENSEARCH_HOST=http://localhost:9200 \
276+
OPENSEARCH_USER=admin \
277+
OPENSEARCH_PASSWORD=admin \
278+
INDEX_NAME=querido-diario \
279+
./init-scripts/opensearch/create-opensearch-index.sh
280+
echo "=== OpenSearch index creation completed ==="
281+
205282
- name: Free up disk space
206283
run: |
207284
# Remove unnecessary packages and clean up
@@ -258,28 +335,103 @@ jobs:
258335
done
259336
260337
- name: Run Python unit tests
338+
timeout-minutes: 5
339+
env:
340+
PYTHONPATH: /mnt/code
341+
PYTHONUNBUFFERED: 1
342+
POSTGRES_PASSWORD: queridodiario
343+
POSTGRES_USER: queridodiario
344+
POSTGRES_DB: queridodiariodb
345+
POSTGRES_HOST: localhost
346+
POSTGRES_PORT: 5432
347+
STORAGE_REGION: us-east-1
348+
STORAGE_ENDPOINT: http://localhost:9000
349+
STORAGE_ACCESS_KEY: minio-access-key
350+
STORAGE_ACCESS_SECRET: minio-secret-key
351+
STORAGE_BUCKET: queridodiariobucket
352+
OPENSEARCH_HOST: http://localhost:9200
353+
OPENSEARCH_INDEX: querido-diario
354+
OPENSEARCH_USER: admin
355+
OPENSEARCH_PASSWORD: admin
356+
APACHE_TIKA_SERVER: http://localhost:9998
357+
DEBUG: 1
358+
HF_HUB_OFFLINE: 1
359+
TRANSFORMERS_OFFLINE: 1
360+
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
261361
run: |
262362
docker run --rm \
263363
--network host \
264-
-e PYTHONPATH=/mnt/code \
265-
-e POSTGRES_PASSWORD=queridodiario \
266-
-e POSTGRES_USER=queridodiario \
267-
-e POSTGRES_DB=queridodiariodb \
268-
-e POSTGRES_HOST=localhost \
269-
-e POSTGRES_PORT=5432 \
364+
-e PYTHONPATH \
365+
-e PYTHONUNBUFFERED \
366+
-e POSTGRES_PASSWORD \
367+
-e POSTGRES_USER \
368+
-e POSTGRES_DB \
369+
-e POSTGRES_HOST \
370+
-e POSTGRES_PORT \
371+
-e STORAGE_REGION \
372+
-e STORAGE_ENDPOINT \
373+
-e STORAGE_ACCESS_KEY \
374+
-e STORAGE_ACCESS_SECRET \
375+
-e STORAGE_BUCKET \
376+
-e OPENSEARCH_HOST \
377+
-e OPENSEARCH_INDEX \
378+
-e OPENSEARCH_USER \
379+
-e OPENSEARCH_PASSWORD \
380+
-e APACHE_TIKA_SERVER \
381+
-e DEBUG \
382+
-e HF_HUB_OFFLINE \
383+
-e TRANSFORMERS_OFFLINE \
384+
-e SENTENCE_TRANSFORMERS_HOME \
270385
test-data-processing:unittest-${{ matrix.arch }} \
271386
python -m unittest discover -s tests -p "*.py" -v
272387
273388
- name: Run coverage report
389+
env:
390+
PYTHONPATH: /mnt/code
391+
PYTHONUNBUFFERED: 1
392+
POSTGRES_PASSWORD: queridodiario
393+
POSTGRES_USER: queridodiario
394+
POSTGRES_DB: queridodiariodb
395+
POSTGRES_HOST: localhost
396+
POSTGRES_PORT: 5432
397+
STORAGE_REGION: us-east-1
398+
STORAGE_ENDPOINT: http://localhost:9000
399+
STORAGE_ACCESS_KEY: minio-access-key
400+
STORAGE_ACCESS_SECRET: minio-secret-key
401+
STORAGE_BUCKET: queridodiariobucket
402+
OPENSEARCH_HOST: http://localhost:9200
403+
OPENSEARCH_INDEX: querido-diario
404+
OPENSEARCH_USER: admin
405+
OPENSEARCH_PASSWORD: admin
406+
APACHE_TIKA_SERVER: http://localhost:9998
407+
DEBUG: 1
408+
HF_HUB_OFFLINE: 1
409+
TRANSFORMERS_OFFLINE: 1
410+
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
274411
run: |
275412
docker run --rm \
276413
--network host \
277-
-e PYTHONPATH=/mnt/code \
278-
-e POSTGRES_PASSWORD=queridodiario \
279-
-e POSTGRES_USER=queridodiario \
280-
-e POSTGRES_DB=queridodiariodb \
281-
-e POSTGRES_HOST=localhost \
282-
-e POSTGRES_PORT=5432 \
414+
-e PYTHONPATH \
415+
-e PYTHONUNBUFFERED \
416+
-e POSTGRES_PASSWORD \
417+
-e POSTGRES_USER \
418+
-e POSTGRES_DB \
419+
-e POSTGRES_HOST \
420+
-e POSTGRES_PORT \
421+
-e STORAGE_REGION \
422+
-e STORAGE_ENDPOINT \
423+
-e STORAGE_ACCESS_KEY \
424+
-e STORAGE_ACCESS_SECRET \
425+
-e STORAGE_BUCKET \
426+
-e OPENSEARCH_HOST \
427+
-e OPENSEARCH_INDEX \
428+
-e OPENSEARCH_USER \
429+
-e OPENSEARCH_PASSWORD \
430+
-e APACHE_TIKA_SERVER \
431+
-e DEBUG \
432+
-e HF_HUB_OFFLINE \
433+
-e TRANSFORMERS_OFFLINE \
434+
-e SENTENCE_TRANSFORMERS_HOME \
283435
test-data-processing:unittest-${{ matrix.arch }} \
284436
bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m"
285437
@@ -288,6 +440,8 @@ jobs:
288440
run: |
289441
docker stop tika-server-${{ matrix.arch }} || true
290442
docker rm tika-server-${{ matrix.arch }} || true
443+
docker stop minio || true
444+
docker rm minio || true
291445
docker system prune -f
292446
docker image prune -af
293447

0 commit comments

Comments
 (0)