Remove disk cleanup and monitoring steps from workflows #69
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| push: | |
| branches: | |
| - main | |
| tags: | |
| - "v*" | |
| workflow_dispatch: | |
| name: Build and Test container images | |
| permissions: | |
| contents: read | |
| packages: write | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| test-multi-arch: | |
| name: Test on multiple architectures | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| runs-on: ${{ matrix.runner }} | |
| timeout-minutes: 45 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Build test image for ${{ matrix.platform }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| load: ${{ matrix.platform == 'linux/amd64' }} | |
| cache-from: type=gha,scope=test-${{ matrix.platform }} | |
| cache-to: type=gha,mode=min,scope=test-${{ matrix.platform }} | |
| tags: | | |
| test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} | |
| # Run functional tests on both architectures with native execution | |
| - name: Test Python dependencies | |
| run: | | |
| docker run --rm test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} python -c " | |
| import sentence_transformers | |
| import psycopg2 | |
| import opensearchpy | |
| import boto3 | |
| import sklearn | |
| import numpy | |
| print('✅ All major dependencies imported successfully on ${{ matrix.platform }}') | |
| print('✅ sentence-transformers version:', sentence_transformers.__version__) | |
| print('✅ psycopg2 version:', psycopg2.__version__) | |
| print('✅ opensearchpy version:', opensearchpy.__version__) | |
| print('✅ boto3 version:', boto3.__version__) | |
| print('✅ scikit-learn version:', sklearn.__version__) | |
| print('✅ numpy version:', numpy.__version__) | |
| " | |
| - name: Clean up test images | |
| if: always() | |
| run: | | |
| docker system prune -f | |
| docker image prune -af | |
| test-python-unit: | |
| name: Run Python unit tests | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| arch: amd64 | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| arch: arm64 | |
| runs-on: ${{ matrix.runner }} | |
| timeout-minutes: 60 | |
| services: | |
| postgres: | |
| image: postgres:11 | |
| env: | |
| POSTGRES_PASSWORD: queridodiario | |
| POSTGRES_USER: queridodiario | |
| POSTGRES_DB: queridodiariodb | |
| options: >- | |
| --health-cmd pg_isready | |
| --health-interval 10s | |
| --health-timeout 5s | |
| --health-retries 5 | |
| ports: | |
| - 5432:5432 | |
| opensearch: | |
| image: opensearchproject/opensearch:2.9.0 | |
| env: | |
| discovery.type: single-node | |
| plugins.security.ssl.http.enabled: false | |
| plugins.security.disabled: true | |
| OPENSEARCH_INITIAL_ADMIN_PASSWORD: admin | |
| options: >- | |
| --health-cmd "curl -s http://localhost:9200 >/dev/null || exit 1" | |
| --health-interval 15s | |
| --health-timeout 10s | |
| --health-retries 20 | |
| ports: | |
| - 9200:9200 | |
| apache-tika: | |
| image: ghcr.io/${{ github.repository }}/apache-tika:latest | |
| ports: | |
| - 9998:9998 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Start MinIO | |
| run: | | |
| docker run -d \ | |
| --name minio \ | |
| --network host \ | |
| --health-cmd "curl -f http://localhost:9000/minio/health/live || exit 1" \ | |
| --health-interval 10s \ | |
| --health-timeout 5s \ | |
| --health-retries 5 \ | |
| -e MINIO_ROOT_USER=minio-access-key \ | |
| -e MINIO_ROOT_PASSWORD=minio-secret-key \ | |
| quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z \ | |
| server /data --console-address :9001 | |
| # Wait for MinIO to be ready | |
| echo "Waiting for MinIO to be ready..." | |
| timeout 60 bash -c 'until curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; do sleep 2; done' | |
| echo "MinIO is ready" | |
| - name: Create MinIO bucket | |
| run: | | |
| # Wait for MinIO to be ready | |
| for i in {1..30}; do | |
| if curl -sf http://localhost:9000/minio/health/live > /dev/null 2>&1; then | |
| echo "MinIO is ready" | |
| break | |
| fi | |
| echo "Waiting for MinIO... ($i/30)" | |
| sleep 2 | |
| done | |
| # Detect architecture and download appropriate MinIO client | |
| ARCH=$(uname -m) | |
| if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then | |
| MC_URL="https://dl.min.io/client/mc/release/linux-arm64/mc" | |
| else | |
| MC_URL="https://dl.min.io/client/mc/release/linux-amd64/mc" | |
| fi | |
| echo "Downloading MinIO client for architecture: $ARCH" | |
| curl -sLo /tmp/mc "$MC_URL" | |
| chmod +x /tmp/mc | |
| # Configure MinIO client | |
| /tmp/mc alias set minio http://localhost:9000 minio-access-key minio-secret-key | |
| # Create bucket if it doesn't exist | |
| /tmp/mc mb minio/queridodiariobucket --ignore-existing || true | |
| echo "MinIO bucket created successfully" | |
| - name: Create OpenSearch index | |
| timeout-minutes: 2 | |
| run: | | |
| echo "=== Creating OpenSearch index ===" | |
| echo "OpenSearch host: http://localhost:9200" | |
| echo "Testing connectivity..." | |
| curl -v http://localhost:9200/_cluster/health || echo "OpenSearch not responding" | |
| echo "Running init script..." | |
| chmod +x init-scripts/opensearch/create-opensearch-index.sh | |
| OPENSEARCH_HOST=http://localhost:9200 \ | |
| OPENSEARCH_USER=admin \ | |
| OPENSEARCH_PASSWORD=admin \ | |
| INDEX_NAME=querido-diario \ | |
| ./init-scripts/opensearch/create-opensearch-index.sh | |
| echo "=== OpenSearch index creation completed ===" | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Build test image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| load: true | |
| cache-from: type=gha,scope=unittest-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=unittest-${{ matrix.arch }} | |
| tags: test-data-processing:unittest-${{ matrix.arch }} | |
| - name: Build Apache Tika test image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile_apache_tika | |
| platforms: ${{ matrix.platform }} | |
| load: true | |
| cache-from: type=gha,scope=tika-unittest-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tika-unittest-${{ matrix.arch }} | |
| tags: test-apache-tika:unittest-${{ matrix.arch }} | |
| - name: Start Apache Tika server | |
| run: | | |
| docker run -d -p 9998:9998 --name tika-server-${{ matrix.arch }} test-apache-tika:unittest-${{ matrix.arch }} | |
| sleep 15 | |
| # Verify Tika is running | |
| for i in {1..6}; do | |
| if curl -f http://localhost:9998/version > /dev/null 2>&1; then | |
| echo "✅ Apache Tika server is ready on ${{ matrix.platform }}" | |
| break | |
| fi | |
| echo "Waiting for Tika server on ${{ matrix.platform }}... ($i/6)" | |
| sleep 5 | |
| done | |
| - name: Run Python unit tests | |
| timeout-minutes: 5 | |
| env: | |
| PYTHONPATH: /mnt/code | |
| PYTHONUNBUFFERED: 1 | |
| POSTGRES_PASSWORD: queridodiario | |
| POSTGRES_USER: queridodiario | |
| POSTGRES_DB: queridodiariodb | |
| POSTGRES_HOST: localhost | |
| POSTGRES_PORT: 5432 | |
| STORAGE_REGION: us-east-1 | |
| STORAGE_ENDPOINT: http://localhost:9000 | |
| STORAGE_ACCESS_KEY: minio-access-key | |
| STORAGE_ACCESS_SECRET: minio-secret-key | |
| STORAGE_BUCKET: queridodiariobucket | |
| OPENSEARCH_HOST: http://localhost:9200 | |
| OPENSEARCH_INDEX: querido-diario | |
| OPENSEARCH_USER: admin | |
| OPENSEARCH_PASSWORD: admin | |
| APACHE_TIKA_SERVER: http://localhost:9998 | |
| DEBUG: 1 | |
| HF_HUB_OFFLINE: 1 | |
| TRANSFORMERS_OFFLINE: 1 | |
| SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers | |
| run: | | |
| docker run --rm \ | |
| --network host \ | |
| -e PYTHONPATH \ | |
| -e PYTHONUNBUFFERED \ | |
| -e POSTGRES_PASSWORD \ | |
| -e POSTGRES_USER \ | |
| -e POSTGRES_DB \ | |
| -e POSTGRES_HOST \ | |
| -e POSTGRES_PORT \ | |
| -e STORAGE_REGION \ | |
| -e STORAGE_ENDPOINT \ | |
| -e STORAGE_ACCESS_KEY \ | |
| -e STORAGE_ACCESS_SECRET \ | |
| -e STORAGE_BUCKET \ | |
| -e OPENSEARCH_HOST \ | |
| -e OPENSEARCH_INDEX \ | |
| -e OPENSEARCH_USER \ | |
| -e OPENSEARCH_PASSWORD \ | |
| -e APACHE_TIKA_SERVER \ | |
| -e DEBUG \ | |
| -e HF_HUB_OFFLINE \ | |
| -e TRANSFORMERS_OFFLINE \ | |
| -e SENTENCE_TRANSFORMERS_HOME \ | |
| test-data-processing:unittest-${{ matrix.arch }} \ | |
| python -m unittest discover -s tests -p "*.py" -v | |
| - name: Run coverage report | |
| env: | |
| PYTHONPATH: /mnt/code | |
| PYTHONUNBUFFERED: 1 | |
| POSTGRES_PASSWORD: queridodiario | |
| POSTGRES_USER: queridodiario | |
| POSTGRES_DB: queridodiariodb | |
| POSTGRES_HOST: localhost | |
| POSTGRES_PORT: 5432 | |
| STORAGE_REGION: us-east-1 | |
| STORAGE_ENDPOINT: http://localhost:9000 | |
| STORAGE_ACCESS_KEY: minio-access-key | |
| STORAGE_ACCESS_SECRET: minio-secret-key | |
| STORAGE_BUCKET: queridodiariobucket | |
| OPENSEARCH_HOST: http://localhost:9200 | |
| OPENSEARCH_INDEX: querido-diario | |
| OPENSEARCH_USER: admin | |
| OPENSEARCH_PASSWORD: admin | |
| APACHE_TIKA_SERVER: http://localhost:9998 | |
| DEBUG: 1 | |
| HF_HUB_OFFLINE: 1 | |
| TRANSFORMERS_OFFLINE: 1 | |
| SENTENCE_TRANSFORMERS_HOME: /tmp/sentence_transformers | |
| run: | | |
| docker run --rm \ | |
| --network host \ | |
| -e PYTHONPATH \ | |
| -e PYTHONUNBUFFERED \ | |
| -e POSTGRES_PASSWORD \ | |
| -e POSTGRES_USER \ | |
| -e POSTGRES_DB \ | |
| -e POSTGRES_HOST \ | |
| -e POSTGRES_PORT \ | |
| -e STORAGE_REGION \ | |
| -e STORAGE_ENDPOINT \ | |
| -e STORAGE_ACCESS_KEY \ | |
| -e STORAGE_ACCESS_SECRET \ | |
| -e STORAGE_BUCKET \ | |
| -e OPENSEARCH_HOST \ | |
| -e OPENSEARCH_INDEX \ | |
| -e OPENSEARCH_USER \ | |
| -e OPENSEARCH_PASSWORD \ | |
| -e APACHE_TIKA_SERVER \ | |
| -e DEBUG \ | |
| -e HF_HUB_OFFLINE \ | |
| -e TRANSFORMERS_OFFLINE \ | |
| -e SENTENCE_TRANSFORMERS_HOME \ | |
| test-data-processing:unittest-${{ matrix.arch }} \ | |
| bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m" | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| docker stop tika-server-${{ matrix.arch }} || true | |
| docker rm tika-server-${{ matrix.arch }} || true | |
| docker stop minio || true | |
| docker rm minio || true | |
| docker system prune -f | |
| docker image prune -af | |
| build-data-processing: | |
| name: Build data processing container image | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| arch: amd64 | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| arch: arm64 | |
| runs-on: ${{ matrix.runner }} | |
| needs: [test-multi-arch, test-python-unit] | |
| timeout-minutes: 90 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push development container image | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=main-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=main-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/${{ github.repository }}:latest-${{ matrix.arch }} | |
| - name: Build and push tagged container image | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=tag-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tag-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/${{ github.repository }}:${{ github.ref_name }}-${{ matrix.arch }} | |
| create-data-processing-manifest: | |
| name: Create data processing multi-arch manifest | |
| runs-on: ubuntu-latest | |
| needs: build-data-processing | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Verify single-arch images availability (branch) | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| for tag in latest-amd64 latest-arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/$REPO:$tag > /dev/null 2>&1; then | |
| echo "Found ghcr.io/$REPO:$tag"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/$REPO:$tag to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push development manifest | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| docker buildx imagetools create \ | |
| -t ghcr.io/$REPO:latest \ | |
| ghcr.io/$REPO:latest-amd64 \ | |
| ghcr.io/$REPO:latest-arm64 | |
| - name: Verify single-arch images availability (tag) | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| for arch in amd64 arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/$REPO:${{ github.ref_name }}-$arch > /dev/null 2>&1; then | |
| echo "Found ghcr.io/$REPO:${{ github.ref_name }}-$arch"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/$REPO:${{ github.ref_name }}-$arch to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push tagged manifest | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| docker buildx imagetools create \ | |
| -t ghcr.io/$REPO:${{ github.ref_name }} \ | |
| ghcr.io/$REPO:${{ github.ref_name }}-amd64 \ | |
| ghcr.io/$REPO:${{ github.ref_name }}-arm64 | |