Skip to content

Remove disk cleanup and monitoring steps from workflows #69

Remove disk cleanup and monitoring steps from workflows

Remove disk cleanup and monitoring steps from workflows #69

on:
push:
branches:
- main
tags:
- "v*"
workflow_dispatch:
name: Build and Test container images
permissions:
contents: read
packages: write
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test-multi-arch:
name: Test on multiple architectures
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm
runs-on: ${{ matrix.runner }}
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:v0.12.5
- name: Build test image for ${{ matrix.platform }}
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: ${{ matrix.platform }}
load: ${{ matrix.platform == 'linux/amd64' }}
cache-from: type=gha,scope=test-${{ matrix.platform }}
cache-to: type=gha,mode=min,scope=test-${{ matrix.platform }}
tags: |
test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}
# Run functional tests on both architectures with native execution
- name: Test Python dependencies
run: |
docker run --rm test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} python -c "
import sentence_transformers
import psycopg2
import opensearchpy
import boto3
import sklearn
import numpy
print('✅ All major dependencies imported successfully on ${{ matrix.platform }}')
print('✅ sentence-transformers version:', sentence_transformers.__version__)
print('✅ psycopg2 version:', psycopg2.__version__)
print('✅ opensearchpy version:', opensearchpy.__version__)
print('✅ boto3 version:', boto3.__version__)
print('✅ scikit-learn version:', sklearn.__version__)
print('✅ numpy version:', numpy.__version__)
"
- name: Clean up test images
if: always()
run: |
docker system prune -f
docker image prune -af
test-python-unit:
name: Run Python unit tests
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
arch: amd64
- platform: linux/arm64
runner: ubuntu-24.04-arm
arch: arm64
runs-on: ${{ matrix.runner }}
timeout-minutes: 60
services:
postgres:
image: postgres:11
env:
POSTGRES_PASSWORD: queridodiario
POSTGRES_USER: queridodiario
POSTGRES_DB: queridodiariodb
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
opensearch:
image: opensearchproject/opensearch:2.9.0
env:
discovery.type: single-node
plugins.security.ssl.http.enabled: false
plugins.security.disabled: true
OPENSEARCH_INITIAL_ADMIN_PASSWORD: admin
options: >-
--health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1"
--health-interval 15s
--health-timeout 10s
--health-retries 20
ports:
- 9200:9200
apache-tika:
image: ghcr.io/${{ github.repository }}/apache-tika:latest
ports:
- 9998:9998
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Start MinIO
run: |
docker run -d \
--name minio \
--network host \
--health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \
--health-interval 10s \
--health-timeout 5s \
--health-retries 5 \
-e MINIO_ROOT_USER=minio-access-key \
-e MINIO_ROOT_PASSWORD=minio-secret-key \
quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \
server /data --console-address :9001
# Wait for MinIO to be ready
echo "Waiting for MinIO to be ready..."
timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done'
echo "MinIO is ready"
- name: Create MinIO bucket
run: |
# Wait for MinIO to be ready
for i in {1..30}; do
if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then
echo "MinIO is ready"
break
fi
echo "Waiting for MinIO... ($i/30)"
sleep 2
done
# Detect architecture and download appropriate MinIO client
ARCH=$(uname -m)
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc"
else
MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc"
fi
echo "Downloading MinIO client for architecture: $ARCH"
curl -sLo /tmp/mc "$MC_URL"
chmod +x /tmp/mc
# Configure MinIO client
/tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key
# Create bucket if it doesn't exist
/tmp/mc mb minio/queridodiariobucket --ignore-existing || true
echo "MinIO bucket created successfully"
- name: Create OpenSearch index
timeout-minutes: 2
run: |
echo "=== Creating OpenSearch index ==="
echo "OpenSearch host: http://localhost:9200"
echo "Testing connectivity..."
curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding"
echo "Running init script..."
chmod +x init-scripts/opensearch/create-opensearch-index.sh
OPENSEARCH_HOST=http://localhost:9200 \
OPENSEARCH_USER=admin \
OPENSEARCH_PASSWORD=admin \
INDEX_NAME=querido-diario \
./init-scripts/opensearch/create-opensearch-index.sh
echo "=== OpenSearch index creation completed ==="
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:v0.12.5
- name: Build test image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: ${{ matrix.platform }}
load: true
cache-from: type=gha,scope=unittest-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=unittest-${{ matrix.arch }}
tags: test-data-processing:unittest-${{ matrix.arch }}
- name: Build Apache Tika test image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile_apache_tika
platforms: ${{ matrix.platform }}
load: true
cache-from: type=gha,scope=tika-unittest-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=tika-unittest-${{ matrix.arch }}
tags: test-apache-tika:unittest-${{ matrix.arch }}
- name: Start Apache Tika server
run: |
docker run -d -p 9998:9998 --name tika-server-${{ matrix.arch }} test-apache-tika:unittest-${{ matrix.arch }}
sleep 15
# Verify Tika is running
for i in {1..6}; do
if curl -f http://localhost:9998/version > /dev/null 2>&1; then
echo "✅ Apache Tika server is ready on ${{ matrix.platform }}"
break
fi
echo "Waiting for Tika server on ${{ matrix.platform }}... ($i/6)"
sleep 5
done
- name: Run Python unit tests
timeout-minutes: 5
env:
PYTHONPATH: /mnt/code
PYTHONUNBUFFERED: 1
POSTGRES_PASSWORD: queridodiario
POSTGRES_USER: queridodiario
POSTGRES_DB: queridodiariodb
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
STORAGE_REGION: us-east-1
STORAGE_ENDPOINT: http://localhost:9000
STORAGE_ACCESS_KEY: minio-access-key
STORAGE_ACCESS_SECRET: minio-secret-key
STORAGE_BUCKET: queridodiariobucket
OPENSEARCH_HOST: http://localhost:9200
OPENSEARCH_INDEX: querido-diario
OPENSEARCH_USER: admin
OPENSEARCH_PASSWORD: admin
APACHE_TIKA_SERVER: http://localhost:9998
DEBUG: 1
HF_HUB_OFFLINE: 1
TRANSFORMERS_OFFLINE: 1
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
run: |
docker run --rm \
--network host \
-e PYTHONPATH \
-e PYTHONUNBUFFERED \
-e POSTGRES_PASSWORD \
-e POSTGRES_USER \
-e POSTGRES_DB \
-e POSTGRES_HOST \
-e POSTGRES_PORT \
-e STORAGE_REGION \
-e STORAGE_ENDPOINT \
-e STORAGE_ACCESS_KEY \
-e STORAGE_ACCESS_SECRET \
-e STORAGE_BUCKET \
-e OPENSEARCH_HOST \
-e OPENSEARCH_INDEX \
-e OPENSEARCH_USER \
-e OPENSEARCH_PASSWORD \
-e APACHE_TIKA_SERVER \
-e DEBUG \
-e HF_HUB_OFFLINE \
-e TRANSFORMERS_OFFLINE \
-e SENTENCE_TRANSFORMERS_HOME \
test-data-processing:unittest-${{ matrix.arch }} \
python -m unittest discover -s tests -p "*.py" -v
- name: Run coverage report
env:
PYTHONPATH: /mnt/code
PYTHONUNBUFFERED: 1
POSTGRES_PASSWORD: queridodiario
POSTGRES_USER: queridodiario
POSTGRES_DB: queridodiariodb
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
STORAGE_REGION: us-east-1
STORAGE_ENDPOINT: http://localhost:9000
STORAGE_ACCESS_KEY: minio-access-key
STORAGE_ACCESS_SECRET: minio-secret-key
STORAGE_BUCKET: queridodiariobucket
OPENSEARCH_HOST: http://localhost:9200
OPENSEARCH_INDEX: querido-diario
OPENSEARCH_USER: admin
OPENSEARCH_PASSWORD: admin
APACHE_TIKA_SERVER: http://localhost:9998
DEBUG: 1
HF_HUB_OFFLINE: 1
TRANSFORMERS_OFFLINE: 1
SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers
run: |
docker run --rm \
--network host \
-e PYTHONPATH \
-e PYTHONUNBUFFERED \
-e POSTGRES_PASSWORD \
-e POSTGRES_USER \
-e POSTGRES_DB \
-e POSTGRES_HOST \
-e POSTGRES_PORT \
-e STORAGE_REGION \
-e STORAGE_ENDPOINT \
-e STORAGE_ACCESS_KEY \
-e STORAGE_ACCESS_SECRET \
-e STORAGE_BUCKET \
-e OPENSEARCH_HOST \
-e OPENSEARCH_INDEX \
-e OPENSEARCH_USER \
-e OPENSEARCH_PASSWORD \
-e APACHE_TIKA_SERVER \
-e DEBUG \
-e HF_HUB_OFFLINE \
-e TRANSFORMERS_OFFLINE \
-e SENTENCE_TRANSFORMERS_HOME \
test-data-processing:unittest-${{ matrix.arch }} \
bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m"
- name: Cleanup
if: always()
run: |
docker stop tika-server-${{ matrix.arch }} || true
docker rm tika-server-${{ matrix.arch }} || true
docker stop minio || true
docker rm minio || true
docker system prune -f
docker image prune -af
build-data-processing:
name: Build data processing container image
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
runner: ubuntu-latest
arch: amd64
- platform: linux/arm64
runner: ubuntu-24.04-arm
arch: arm64
runs-on: ${{ matrix.runner }}
needs: [test-multi-arch, test-python-unit]
timeout-minutes: 90
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:v0.12.5
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push development container image
if: ${{ startsWith(github.ref, 'refs/heads/') }}
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: ${{ matrix.platform }}
push: true
cache-from: type=gha,scope=main-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=main-${{ matrix.arch }}
tags: |
ghcr.io/${{ github.repository }}:latest-${{ matrix.arch }}
- name: Build and push tagged container image
if: ${{ startsWith(github.ref, 'refs/tags/') }}
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: ${{ matrix.platform }}
push: true
cache-from: type=gha,scope=tag-${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=tag-${{ matrix.arch }}
tags: |
ghcr.io/${{ github.repository }}:${{ github.ref_name }}-${{ matrix.arch }}
create-data-processing-manifest:
name: Create data processing multi-arch manifest
runs-on: ubuntu-latest
needs: build-data-processing
timeout-minutes: 15
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Verify single-arch images availability (branch)
if: ${{ startsWith(github.ref, 'refs/heads/') }}
run: |
REPO="${{ github.repository }}"
for tag in latest-amd64 latest-arm64; do
for i in {1..20}; do
if docker buildx imagetools inspect ghcr.io/$REPO:$tag > /dev/null 2>&1; then
echo "Found ghcr.io/$REPO:$tag";
break;
fi
echo "Waiting for ghcr.io/$REPO:$tag to be available ($i/20)...";
sleep 3;
done
done
- name: Create and push development manifest
if: ${{ startsWith(github.ref, 'refs/heads/') }}
run: |
REPO="${{ github.repository }}"
docker buildx imagetools create \
-t ghcr.io/$REPO:latest \
ghcr.io/$REPO:latest-amd64 \
ghcr.io/$REPO:latest-arm64
- name: Verify single-arch images availability (tag)
if: ${{ startsWith(github.ref, 'refs/tags/') }}
run: |
REPO="${{ github.repository }}"
for arch in amd64 arm64; do
for i in {1..20}; do
if docker buildx imagetools inspect ghcr.io/$REPO:${{ github.ref_name }}-$arch > /dev/null 2>&1; then
echo "Found ghcr.io/$REPO:${{ github.ref_name }}-$arch";
break;
fi
echo "Waiting for ghcr.io/$REPO:${{ github.ref_name }}-$arch to be available ($i/20)...";
sleep 3;
done
done
- name: Create and push tagged manifest
if: ${{ startsWith(github.ref, 'refs/tags/') }}
run: |
REPO="${{ github.repository }}"
docker buildx imagetools create \
-t ghcr.io/$REPO:${{ github.ref_name }} \
ghcr.io/$REPO:${{ github.ref_name }}-amd64 \
ghcr.io/$REPO:${{ github.ref_name }}-arm64