Skip to content

Commit 52ff9c5

Browse files
committed
Separate Apache Tika build into dedicated workflow
Created new build_apache_tika.yaml workflow that: - Runs only when Dockerfile_apache_tika or its workflow changes - Tests Apache Tika on both amd64 and arm64 - Builds and pushes multi-arch images to GHCR - Creates multi-arch manifests Removed from build_container_image.yaml: - Apache Tika build and test steps from test-multi-arch job - build-apache-tika job (moved to dedicated workflow) - create-apache-tika-manifest job (moved to dedicated workflow) This separation reduces unnecessary builds and improves CI efficiency by only building Apache Tika when its Dockerfile changes.
1 parent 1db31c3 commit 52ff9c5

2 files changed

Lines changed: 236 additions & 175 deletions

File tree

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
on:
2+
push:
3+
branches:
4+
- main
5+
paths:
6+
- 'Dockerfile_apache_tika'
7+
- '.github/workflows/build_apache_tika.yaml'
8+
tags:
9+
- "v*"
10+
workflow_dispatch:
11+
12+
name: Build and Test Apache Tika container image
13+
14+
permissions:
15+
contents: read
16+
packages: write
17+
18+
concurrency:
19+
group: ${{ github.workflow }}-${{ github.ref }}
20+
cancel-in-progress: true
21+
22+
jobs:
23+
test-apache-tika:
24+
name: Test Apache Tika on multiple architectures
25+
strategy:
26+
fail-fast: false
27+
matrix:
28+
include:
29+
- platform: linux/amd64
30+
runner: ubuntu-latest
31+
arch: amd64
32+
- platform: linux/arm64
33+
runner: ubuntu-24.04-arm
34+
arch: arm64
35+
runs-on: ${{ matrix.runner }}
36+
timeout-minutes: 30
37+
steps:
38+
- name: Checkout code
39+
uses: actions/checkout@v4
40+
41+
- name: Free up disk space
42+
run: |
43+
sudo apt-get autoremove -y
44+
sudo apt-get autoclean
45+
sudo rm -rf /usr/share/dotnet
46+
sudo rm -rf /usr/local/lib/android
47+
sudo rm -rf /opt/ghc
48+
sudo rm -rf /opt/hostedtoolcache/CodeQL
49+
sudo docker system prune -af
50+
df -h
51+
52+
- name: Set up Docker Buildx
53+
uses: docker/setup-buildx-action@v3
54+
with:
55+
driver-opts: |
56+
image=moby/buildkit:v0.12.5
57+
58+
- name: Build Apache Tika test image for ${{ matrix.platform }}
59+
uses: docker/build-push-action@v5
60+
with:
61+
context: .
62+
file: ./Dockerfile_apache_tika
63+
platforms: ${{ matrix.platform }}
64+
load: ${{ matrix.platform == 'linux/amd64' }}
65+
cache-from: type=gha,scope=tika-test-${{ matrix.arch }}
66+
cache-to: type=gha,mode=min,scope=tika-test-${{ matrix.arch }}
67+
tags: |
68+
test-apache-tika:${{ matrix.arch }}
69+
70+
- name: Test Apache Tika server
71+
run: |
72+
# Start Tika server
73+
CONTAINER_ID=$(docker run -d -p 9998:9998 test-apache-tika:${{ matrix.arch }})
74+
echo "Waiting for Tika server to start on ${{ matrix.platform }}..."
75+
sleep 15
76+
77+
# Test if Tika is responding (with retry)
78+
TIKA_READY=false
79+
for i in {1..6}; do
80+
if curl -f http://localhost:9998/version > /dev/null 2>&1; then
81+
TIKA_READY=true
82+
break
83+
fi
84+
echo "Attempt $i/6: Tika not ready yet, waiting 5 more seconds..."
85+
sleep 5
86+
done
87+
88+
if [ "$TIKA_READY" = true ]; then
89+
echo "✅ Apache Tika server is responding on ${{ matrix.platform }}"
90+
TIKA_VERSION=$(curl -s http://localhost:9998/version)
91+
echo "✅ Tika version: $TIKA_VERSION"
92+
else
93+
echo "❌ Apache Tika server is not responding after 45 seconds on ${{ matrix.platform }}"
94+
echo "Container logs:"
95+
docker logs $CONTAINER_ID
96+
docker stop $CONTAINER_ID
97+
exit 1
98+
fi
99+
100+
# Cleanup
101+
docker stop $CONTAINER_ID
102+
103+
- name: Clean up test images
104+
if: always()
105+
run: |
106+
docker system prune -f
107+
docker image prune -af
108+
109+
build-apache-tika:
110+
name: Build Apache Tika container image
111+
strategy:
112+
fail-fast: false
113+
matrix:
114+
include:
115+
- platform: linux/amd64
116+
runner: ubuntu-latest
117+
arch: amd64
118+
- platform: linux/arm64
119+
runner: ubuntu-24.04-arm
120+
arch: arm64
121+
runs-on: ${{ matrix.runner }}
122+
needs: [test-apache-tika]
123+
timeout-minutes: 30
124+
steps:
125+
- name: Checkout code
126+
uses: actions/checkout@v4
127+
128+
- name: Free up disk space
129+
run: |
130+
sudo apt-get autoremove -y
131+
sudo apt-get autoclean
132+
sudo rm -rf /usr/share/dotnet
133+
sudo rm -rf /usr/local/lib/android
134+
sudo rm -rf /opt/ghc
135+
sudo rm -rf /opt/hostedtoolcache/CodeQL
136+
sudo docker system prune -af
137+
df -h
138+
139+
- name: Set up Docker Buildx
140+
uses: docker/setup-buildx-action@v3
141+
with:
142+
driver-opts: |
143+
image=moby/buildkit:v0.12.5
144+
145+
- name: Login to GitHub Container Registry
146+
uses: docker/login-action@v3
147+
with:
148+
registry: ghcr.io
149+
username: ${{ github.repository_owner }}
150+
password: ${{ secrets.GITHUB_TOKEN }}
151+
152+
- name: Build and push Apache Tika development container image
153+
if: ${{ startsWith(github.ref, 'refs/heads/') }}
154+
uses: docker/build-push-action@v5
155+
with:
156+
context: .
157+
file: ./Dockerfile_apache_tika
158+
platforms: ${{ matrix.platform }}
159+
push: true
160+
cache-from: type=gha,scope=tika-main-${{ matrix.arch }}
161+
cache-to: type=gha,mode=max,scope=tika-main-${{ matrix.arch }}
162+
tags: |
163+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-${{ matrix.arch }}
164+
165+
- name: Build and push Apache Tika tagged container image
166+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
167+
uses: docker/build-push-action@v5
168+
with:
169+
context: .
170+
file: ./Dockerfile_apache_tika
171+
platforms: ${{ matrix.platform }}
172+
push: true
173+
cache-from: type=gha,scope=tika-tag-${{ matrix.arch }}
174+
cache-to: type=gha,mode=max,scope=tika-tag-${{ matrix.arch }}
175+
tags: |
176+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-${{ matrix.arch }}
177+
178+
create-apache-tika-manifest:
179+
name: Create Apache Tika multi-arch manifest
180+
runs-on: ubuntu-latest
181+
needs: build-apache-tika
182+
timeout-minutes: 15
183+
steps:
184+
- name: Set up Docker Buildx
185+
uses: docker/setup-buildx-action@v3
186+
187+
- name: Login to GitHub Container Registry
188+
uses: docker/login-action@v3
189+
with:
190+
registry: ghcr.io
191+
username: ${{ github.repository_owner }}
192+
password: ${{ secrets.GITHUB_TOKEN }}
193+
194+
- name: Verify single-arch images availability (branch)
195+
if: ${{ startsWith(github.ref, 'refs/heads/') }}
196+
run: |
197+
for tag in latest-amd64 latest-arm64; do
198+
for i in {1..20}; do
199+
if docker buildx imagetools inspect ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag > /dev/null 2>&1; then
200+
echo "Found ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag";
201+
break;
202+
fi
203+
echo "Waiting for ghcr.io/okfn-brasil/querido-diario-apache-tika-server:$tag to be available ($i/20)...";
204+
sleep 3;
205+
done
206+
done
207+
208+
- name: Create and push Apache Tika development manifest
209+
if: ${{ startsWith(github.ref, 'refs/heads/') }}
210+
run: |
211+
docker buildx imagetools create \
212+
-t ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest \
213+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-amd64 \
214+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:latest-arm64
215+
216+
- name: Verify single-arch images availability (tag)
217+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
218+
run: |
219+
for arch in amd64 arm64; do
220+
for i in {1..20}; do
221+
if docker buildx imagetools inspect ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch > /dev/null 2>&1; then
222+
echo "Found ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch";
223+
break;
224+
fi
225+
echo "Waiting for ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-$arch to be available ($i/20)...";
226+
sleep 3;
227+
done
228+
done
229+
230+
- name: Create and push Apache Tika tagged manifest
231+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
232+
run: |
233+
docker buildx imagetools create \
234+
-t ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }} \
235+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-amd64 \
236+
ghcr.io/okfn-brasil/querido-diario-apache-tika-server:${{ github.ref_name }}-arm64

0 commit comments

Comments
 (0)