Fix CI: Increase OpenSearch health check timeout and retries #64
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| push: | |
| branches: | |
| - main | |
| tags: | |
| - "v*" | |
| workflow_dispatch: | |
| name: Build and Test container images | |
| permissions: | |
| contents: read | |
| packages: write | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| test-multi-arch: | |
| name: Test on multiple architectures | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| runs-on: ${{ matrix.runner }} | |
| timeout-minutes: 45 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Free up disk space | |
| run: | | |
| # Remove unnecessary packages and clean up | |
| sudo apt-get autoremove -y | |
| sudo apt-get autoclean | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker system prune -af | |
| df -h | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Monitor disk usage before build | |
| run: | | |
| echo "=== Disk usage before build ===" | |
| df -h | |
| echo "=== Docker system info ===" | |
| docker system df | |
| - name: Build test image for ${{ matrix.platform }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| load: ${{ matrix.platform == 'linux/amd64' }} | |
| cache-from: type=gha,scope=test-${{ matrix.platform }} | |
| cache-to: type=gha,mode=min,scope=test-${{ matrix.platform }} | |
| tags: | | |
| test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} | |
| - name: Monitor disk usage after build | |
| run: | | |
| echo "=== Disk usage after build ===" | |
| df -h | |
| echo "=== Docker system info ===" | |
| docker system df | |
| - name: Build Apache Tika test image for ${{ matrix.platform }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile_apache_tika | |
| platforms: ${{ matrix.platform }} | |
| load: ${{ matrix.platform == 'linux/amd64' }} | |
| cache-from: type=gha,scope=tika-test-${{ matrix.platform }} | |
| cache-to: type=gha,mode=min,scope=tika-test-${{ matrix.platform }} | |
| tags: | | |
| test-apache-tika:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} | |
| # Run functional tests on both architectures with native execution | |
| - name: Test Python dependencies | |
| run: | | |
| docker run --rm test-data-processing:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }} python -c " | |
| import sentence_transformers | |
| import psycopg2 | |
| import opensearchpy | |
| import boto3 | |
| import sklearn | |
| import numpy | |
| print('✅ All major dependencies imported successfully on ${{ matrix.platform }}') | |
| print('✅ sentence-transformers version:', sentence_transformers.__version__) | |
| print('✅ psycopg2 version:', psycopg2.__version__) | |
| print('✅ opensearchpy version:', opensearchpy.__version__) | |
| print('✅ boto3 version:', boto3.__version__) | |
| print('✅ scikit-learn version:', sklearn.__version__) | |
| print('✅ numpy version:', numpy.__version__) | |
| " | |
| - name: Test Apache Tika server | |
| run: | | |
| # Start Tika server | |
| CONTAINER_ID=$(docker run -d -p 9998:9998 test-apache-tika:${{ matrix.platform == 'linux/amd64' && 'amd64' || 'arm64' }}) | |
| echo "Waiting for Tika server to start on ${{ matrix.platform }}..." | |
| sleep 15 | |
| # Test if Tika is responding (with retry) | |
| TIKA_READY=false | |
| for i in {1..6}; do | |
| if curl -f http://localhost:9998/version > /dev/null 2>&1; then | |
| TIKA_READY=true | |
| break | |
| fi | |
| echo "Attempt $i/6: Tika not ready yet, waiting 5 more seconds..." | |
| sleep 5 | |
| done | |
| if [ "$TIKA_READY" = true ]; then | |
| echo "✅ Apache Tika server is responding on ${{ matrix.platform }}" | |
| TIKA_VERSION=$(curl -s http://localhost:9998/version) | |
| echo "✅ Tika version: $TIKA_VERSION" | |
| else | |
| echo "❌ Apache Tika server is not responding after 45 seconds on ${{ matrix.platform }}" | |
| echo "Container logs:" | |
| docker logs $CONTAINER_ID | |
| docker stop $CONTAINER_ID | |
| exit 1 | |
| fi | |
| # Cleanup | |
| docker stop $CONTAINER_ID | |
| - name: Clean up test images | |
| if: always() | |
| run: | | |
| docker system prune -f | |
| docker image prune -af | |
| test-python-unit: | |
| name: Run Python unit tests | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| arch: amd64 | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| arch: arm64 | |
| runs-on: ${{ matrix.runner }} | |
| timeout-minutes: 60 | |
| services: | |
| postgres: | |
| image: postgres:11 | |
| env: | |
| POSTGRES_PASSWORD: queridodiario | |
| POSTGRES_USER: queridodiario | |
| POSTGRES_DB: queridodiariodb | |
| options: >- | |
| --health-cmd pg_isready | |
| --health-interval 10s | |
| --health-timeout 5s | |
| --health-retries 5 | |
| ports: | |
| - 5432:5432 | |
| opensearch: | |
| image: opensearchproject/opensearch:2.9.0 | |
| env: | |
| discovery.type: single-node | |
| plugins.security.ssl.http.enabled: false | |
| options: >- | |
| --health-cmd "curl -f http://localhost:9200/_cluster/health" | |
| --health-interval 10s | |
| --health-timeout 5s | |
| --health-retries 5 | |
| ports: | |
| - 9200:9200 | |
| minio: | |
| image: quay.io/minio/minio:RELEASE.2025-09-07T16-13-09Z | |
| env: | |
| MINIO_ACCESS_KEY: minio-access-key | |
| MINIO_SECRET_KEY: minio-secret-key | |
| MINIO_DEFAULT_BUCKETS: queridodiariobucket:public | |
| ports: | |
| - 9000:9000 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Free up disk space | |
| run: | | |
| # Remove unnecessary packages and clean up | |
| sudo apt-get autoremove -y | |
| sudo apt-get autoclean | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker system prune -af | |
| df -h | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Build test image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| load: true | |
| cache-from: type=gha,scope=unittest-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=unittest-${{ matrix.arch }} | |
| tags: test-data-processing:unittest-${{ matrix.arch }} | |
| - name: Build Apache Tika test image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile_apache_tika | |
| platforms: ${{ matrix.platform }} | |
| load: true | |
| cache-from: type=gha,scope=tika-unittest-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tika-unittest-${{ matrix.arch }} | |
| tags: test-apache-tika:unittest-${{ matrix.arch }} | |
| - name: Start Apache Tika server | |
| run: | | |
| docker run -d -p 9998:9998 --name tika-server-${{ matrix.arch }} test-apache-tika:unittest-${{ matrix.arch }} | |
| sleep 15 | |
| # Verify Tika is running | |
| for i in {1..6}; do | |
| if curl -f http://localhost:9998/version > /dev/null 2>&1; then | |
| echo "✅ Apache Tika server is ready on ${{ matrix.platform }}" | |
| break | |
| fi | |
| echo "Waiting for Tika server on ${{ matrix.platform }}... ($i/6)" | |
| sleep 5 | |
| done | |
| - name: Run Python unit tests | |
| run: | | |
| docker run --rm \ | |
| --network host \ | |
| -e PYTHONPATH=/mnt/code \ | |
| -e POSTGRES_PASSWORD=queridodiario \ | |
| -e POSTGRES_USER=queridodiario \ | |
| -e POSTGRES_DB=queridodiariodb \ | |
| -e POSTGRES_HOST=localhost \ | |
| -e POSTGRES_PORT=5432 \ | |
| test-data-processing:unittest-${{ matrix.arch }} \ | |
| python -m unittest discover -s tests -p "*.py" -v | |
| - name: Run coverage report | |
| run: | | |
| docker run --rm \ | |
| --network host \ | |
| -e PYTHONPATH=/mnt/code \ | |
| -e POSTGRES_PASSWORD=queridodiario \ | |
| -e POSTGRES_USER=queridodiario \ | |
| -e POSTGRES_DB=queridodiariodb \ | |
| -e POSTGRES_HOST=localhost \ | |
| -e POSTGRES_PORT=5432 \ | |
| test-data-processing:unittest-${{ matrix.arch }} \ | |
| bash -c "coverage run -m unittest discover -s tests -p '*.py' && coverage report -m" | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| docker stop tika-server-${{ matrix.arch }} || true | |
| docker rm tika-server-${{ matrix.arch }} || true | |
| docker system prune -f | |
| docker image prune -af | |
| build-data-processing: | |
| name: Build data processing container image | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| arch: amd64 | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| arch: arm64 | |
| runs-on: ${{ matrix.runner }} | |
| needs: [test-multi-arch, test-python-unit] | |
| timeout-minutes: 90 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Free up disk space | |
| run: | | |
| # Remove unnecessary packages and clean up | |
| sudo apt-get autoremove -y | |
| sudo apt-get autoclean | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker system prune -af | |
| df -h | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push development container image | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=main-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=main-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/${{ github.repository }}:latest-${{ matrix.arch }} | |
| - name: Build and push tagged container image | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=tag-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tag-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/${{ github.repository }}:${{ github.ref_name }}-${{ matrix.arch }} | |
| build-apache-tika: | |
| name: Build Apache Tika container image | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - platform: linux/amd64 | |
| runner: ubuntu-latest | |
| arch: amd64 | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| arch: arm64 | |
| runs-on: ${{ matrix.runner }} | |
| needs: [test-multi-arch, test-python-unit] | |
| timeout-minutes: 60 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Free up disk space | |
| run: | | |
| # Remove unnecessary packages and clean up | |
| sudo apt-get autoremove -y | |
| sudo apt-get autoclean | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo docker system prune -af | |
| df -h | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| with: | |
| driver-opts: | | |
| image=moby/buildkit:v0.12.5 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push Apache Tika development container image | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile_apache_tika | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=tika-main-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tika-main-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-${{ matrix.arch }} | |
| - name: Build and push Apache Tika tagged container image | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./Dockerfile_apache_tika | |
| platforms: ${{ matrix.platform }} | |
| push: true | |
| cache-from: type=gha,scope=tika-tag-${{ matrix.arch }} | |
| cache-to: type=gha,mode=max,scope=tika-tag-${{ matrix.arch }} | |
| tags: | | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-${{ matrix.arch }} | |
| create-data-processing-manifest: | |
| name: Create data processing multi-arch manifest | |
| runs-on: ubuntu-latest | |
| needs: build-data-processing | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Verify single-arch images availability (branch) | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| for tag in latest-amd64 latest-arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/$REPO:$tag > /dev/null 2>&1; then | |
| echo "Found ghcr.io/$REPO:$tag"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/$REPO:$tag to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push development manifest | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| docker buildx imagetools create \ | |
| -t ghcr.io/$REPO:latest \ | |
| ghcr.io/$REPO:latest-amd64 \ | |
| ghcr.io/$REPO:latest-arm64 | |
| - name: Verify single-arch images availability (tag) | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| for arch in amd64 arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/$REPO:${{ github.ref_name }}-$arch > /dev/null 2>&1; then | |
| echo "Found ghcr.io/$REPO:${{ github.ref_name }}-$arch"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/$REPO:${{ github.ref_name }}-$arch to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push tagged manifest | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| REPO="${{ github.repository }}" | |
| docker buildx imagetools create \ | |
| -t ghcr.io/$REPO:${{ github.ref_name }} \ | |
| ghcr.io/$REPO:${{ github.ref_name }}-amd64 \ | |
| ghcr.io/$REPO:${{ github.ref_name }}-arm64 | |
| create-apache-tika-manifest: | |
| name: Create Apache Tika multi-arch manifest | |
| runs-on: ubuntu-latest | |
| needs: build-apache-tika | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Verify single-arch images availability (branch) | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| for tag in latest-amd64 latest-arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag > /dev/null 2>&1; then | |
| echo "Found ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push Apache Tika development manifest | |
| if: ${{ startsWith(github.ref, 'refs/heads/') }} | |
| run: | | |
| docker buildx imagetools create \ | |
| -t ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest \ | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-amd64 \ | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-arm64 | |
| - name: Verify single-arch images availability (tag) | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| for arch in amd64 arm64; do | |
| for i in {1..20}; do | |
| if docker buildx imagetools inspect ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch > /dev/null 2>&1; then | |
| echo "Found ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch"; | |
| break; | |
| fi | |
| echo "Waiting for ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch to be available ($i/20)..."; | |
| sleep 3; | |
| done | |
| done | |
| - name: Create and push Apache Tika tagged manifest | |
| if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
| run: | | |
| docker buildx imagetools create \ | |
| -t ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }} \ | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-amd64 \ | |
| ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-arm64 |