From 3ba6be0a95fead97e65a3422fe40af65ddd92e92 Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 9 Jun 2026 14:03:34 +0200 Subject: [PATCH 1/4] ci: bound seeded Go cache size and speed up disk cleanup The cache-seeder saved its caches with a restore-keys prefix fallback, so every go.sum change restored the previous cache and re-saved the union. Old module versions and stale build objects accumulated, growing the cache from ~3GB to ~7GB and exhausting runner disk (No space left on device). Drop restore-keys from the seeder save branches so each go.sum seeds a clean, bounded cache; PR runs keep restore-keys for warm-start fallback. Also delete the unused preinstalled toolchains in parallel and log free space before and after, to halve the cleanup time and make headroom visible. Refs: https://github.com/go-gitea/gitea/issues/37974 Assisted-by: Claude:Opus-4.8 --- .github/actions/free-disk-space/action.yml | 13 +++++++++++-- .github/actions/go-cache/action.yml | 5 ++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml index 510b643a33427..03cc7f90c2701 100644 --- a/.github/actions/free-disk-space/action.yml +++ b/.github/actions/free-disk-space/action.yml @@ -1,9 +1,18 @@ name: free-disk-space description: Free space on / before large cache restores -# Delete preinstalled toolchains which gitea doesn't use +# Delete unused preinstalled toolchains in parallel (independent trees). The df +# calls bracket the cleanup to log real free space. runs: using: composite steps: - shell: bash - run: sudo rm -rf /usr/local/lib/android /usr/local/.ghcup /opt/ghc /usr/share/dotnet + run: | + echo "free space before cleanup:" + df -h / + for dir in /usr/local/lib/android /usr/local/.ghcup /opt/ghc /usr/share/dotnet; do + sudo rm -rf "$dir" & + done + wait + echo "free space after cleanup:" + df -h / diff --git a/.github/actions/go-cache/action.yml b/.github/actions/go-cache/action.yml index 7096fa3952c50..5abf4e319a661 100644 --- a/.github/actions/go-cache/action.yml +++ b/.github/actions/go-cache/action.yml @@ -4,6 +4,8 @@ description: Restore the go module, build, and golangci-lint caches. Save only o # Only the cache-seeder workflow saves; rename requires updating cache-seeder.yml. # The lint job restores but does not save the gobuild cache, so only one writer # (the gobuild job) populates it and there is no contention on the cache key. +# Seeder restores by exact key only (no restore-keys) so each go.sum seeds a clean +# cache and size stays bounded; do not add restore-keys here. PR runs keep them. inputs: lint-cache: @@ -18,7 +20,6 @@ runs: with: path: ~/go/pkg/mod key: gomod-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('go.sum') }} - restore-keys: gomod-${{ runner.os }}-${{ runner.arch }} - if: ${{ github.workflow != 'cache-seeder' }} uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: @@ -30,7 +31,6 @@ runs: with: path: ~/.cache/go-build key: gobuild-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('go.sum') }} - restore-keys: gobuild-${{ runner.os }}-${{ runner.arch }} - if: ${{ github.workflow != 'cache-seeder' || inputs.lint-cache == 'true' }} uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: @@ -42,7 +42,6 @@ runs: with: path: ~/.cache/golangci-lint key: golint-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('go.sum', '.golangci.yml') }} - restore-keys: golint-${{ runner.os }}-${{ runner.arch }} - if: ${{ inputs.lint-cache == 'true' && github.workflow != 'cache-seeder' }} uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: From 57828aa374b4f1dfce7155f84342b45cb729804c Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 9 Jun 2026 14:18:04 +0200 Subject: [PATCH 2/4] ci: test dropping toolchain deletion against db-tests Temporary experiment: df shows ~89G free on / before any cleanup, so disable the toolchain deletion and run the pgsql shards on this action-only change to confirm db-tests still have ample disk headroom. Adds an end-of-job df to capture peak usage. Revert once measured. Refs: https://github.com/go-gitea/gitea/issues/37974 Assisted-by: Claude:Opus-4.8 --- .github/actions/free-disk-space/action.yml | 14 +++----------- .github/workflows/pull-db-tests.yml | 10 ++++++++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml index 03cc7f90c2701..8cb72cb4cd696 100644 --- a/.github/actions/free-disk-space/action.yml +++ b/.github/actions/free-disk-space/action.yml @@ -1,18 +1,10 @@ name: free-disk-space description: Free space on / before large cache restores -# Delete unused preinstalled toolchains in parallel (independent trees). The df -# calls bracket the cleanup to log real free space. +# EXPERIMENT (#37974): toolchain deletion disabled to test whether the ~89G free +# baseline suffices on its own. df logs starting headroom; restore the rm if low. runs: using: composite steps: - shell: bash - run: | - echo "free space before cleanup:" - df -h / - for dir in /usr/local/lib/android /usr/local/.ghcup /opt/ghc /usr/share/dotnet; do - sudo rm -rf "$dir" & - done - wait - echo "free space after cleanup:" - df -h / + run: df -h / diff --git a/.github/workflows/pull-db-tests.yml b/.github/workflows/pull-db-tests.yml index 4cc8d25bbb902..c40df7a24d9f4 100644 --- a/.github/workflows/pull-db-tests.yml +++ b/.github/workflows/pull-db-tests.yml @@ -15,7 +15,7 @@ jobs: uses: ./.github/workflows/files-changed.yml test-pgsql-shard-1: - if: needs.files-changed.outputs.backend == 'true' + if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' needs: files-changed runs-on: ubuntu-latest timeout-minutes: 50 @@ -49,9 +49,11 @@ jobs: shard: 1 total-shards: 2 run-migration: "true" + - if: always() + run: df -h / test-pgsql-shard-2: - if: needs.files-changed.outputs.backend == 'true' + if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' needs: files-changed runs-on: ubuntu-latest timeout-minutes: 50 @@ -84,6 +86,8 @@ jobs: with: shard: 2 total-shards: 2 + - if: always() + run: df -h / test-sqlite: if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' @@ -108,6 +112,8 @@ jobs: GOTEST_FLAGS: -timeout=40m TAGS: bindata gogit GOEXPERIMENT: + - if: always() + run: df -h / test-unit: if: needs.files-changed.outputs.backend == 'true' From fd555f96b907047848bc27a37eee99ed117b90d5 Mon Sep 17 00:00:00 2001 From: silverwind Date: Tue, 9 Jun 2026 14:38:08 +0200 Subject: [PATCH 3/4] ci: re-enable toolchain deletion after disk experiment Disabling the deletion reproduced the #37974 "No space left on device" failure on a disk-starved runner mid cache-restore, while sibling jobs on the common ~89G-free runners passed: the hosted fleet is heterogeneous and the deletion is the headroom that keeps the small-disk minority green. Keep the parallelized deletion and df logging; revert the db-test gate and end-of-job df scaffolding used for the experiment. Refs: https://github.com/go-gitea/gitea/issues/37974 Assisted-by: Claude:Opus-4.8 --- .github/actions/free-disk-space/action.yml | 14 +++++++++++--- .github/workflows/pull-db-tests.yml | 10 ++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml index 8cb72cb4cd696..03cc7f90c2701 100644 --- a/.github/actions/free-disk-space/action.yml +++ b/.github/actions/free-disk-space/action.yml @@ -1,10 +1,18 @@ name: free-disk-space description: Free space on / before large cache restores -# EXPERIMENT (#37974): toolchain deletion disabled to test whether the ~89G free -# baseline suffices on its own. df logs starting headroom; restore the rm if low. +# Delete unused preinstalled toolchains in parallel (independent trees). The df +# calls bracket the cleanup to log real free space. runs: using: composite steps: - shell: bash - run: df -h / + run: | + echo "free space before cleanup:" + df -h / + for dir in /usr/local/lib/android /usr/local/.ghcup /opt/ghc /usr/share/dotnet; do + sudo rm -rf "$dir" & + done + wait + echo "free space after cleanup:" + df -h / diff --git a/.github/workflows/pull-db-tests.yml b/.github/workflows/pull-db-tests.yml index c40df7a24d9f4..4cc8d25bbb902 100644 --- a/.github/workflows/pull-db-tests.yml +++ b/.github/workflows/pull-db-tests.yml @@ -15,7 +15,7 @@ jobs: uses: ./.github/workflows/files-changed.yml test-pgsql-shard-1: - if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' + if: needs.files-changed.outputs.backend == 'true' needs: files-changed runs-on: ubuntu-latest timeout-minutes: 50 @@ -49,11 +49,9 @@ jobs: shard: 1 total-shards: 2 run-migration: "true" - - if: always() - run: df -h / test-pgsql-shard-2: - if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' + if: needs.files-changed.outputs.backend == 'true' needs: files-changed runs-on: ubuntu-latest timeout-minutes: 50 @@ -86,8 +84,6 @@ jobs: with: shard: 2 total-shards: 2 - - if: always() - run: df -h / test-sqlite: if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' @@ -112,8 +108,6 @@ jobs: GOTEST_FLAGS: -timeout=40m TAGS: bindata gogit GOEXPERIMENT: - - if: always() - run: df -h / test-unit: if: needs.files-changed.outputs.backend == 'true' From 6eb61c998870579fb88822efa266f9fe57db4530 Mon Sep 17 00:00:00 2001 From: silverwind Date: Wed, 10 Jun 2026 10:27:56 +0200 Subject: [PATCH 4/4] Apply suggestion from @silverwind Signed-off-by: silverwind --- .github/actions/free-disk-space/action.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml index 03cc7f90c2701..a20f2bd5ae34e 100644 --- a/.github/actions/free-disk-space/action.yml +++ b/.github/actions/free-disk-space/action.yml @@ -1,8 +1,7 @@ name: free-disk-space description: Free space on / before large cache restores -# Delete unused preinstalled toolchains in parallel (independent trees). The df -# calls bracket the cleanup to log real free space. +# Delete preinstalled toolchains which gitea doesn't use and show disk space usage runs: using: composite steps: