77 workflow_dispatch :
88 inputs :
99 clear_cache :
10- description : " Skip restoring the DerivedData cache (force a cold build)"
10+ # Re-runs of a failed job already force a cold build automatically
11+ # (see "Wipe restored caches" step). This input is for the rare case
12+ # where you want to start a brand-new run with a cold build — e.g.
13+ # after a CACHE_SALT bump on `main` to verify the cold path before
14+ # PR runs hit it.
15+ description : " Force a cold build on the FIRST attempt (re-runs are already cold)"
1116 type : boolean
1217 default : false
1318
2833
2934jobs :
3035 test-core :
31- runs-on : macos-latest
32- # 30 min, not 15. Two-bucket budget:
33- # ~13–14 min — full cold build (mlx-swift `Cmlx` C++ + OsaurusCore +
36+ # Pinned (was `macos-latest`) so a runner image bump can't quietly
37+ # change the build environment under us.
38+ runs-on : macos-26
39+ # 45 min, not 30. Three-bucket budget:
40+ # ~25–30 min — full cold build (mlx-swift `Cmlx` C++ + SQLCipher
41+ # `sqlite3.c` ~250k LoC of C compiled with
42+ # `-DSQLITE_HAS_CODEC=1` and friends + OsaurusCore +
3443 # OsaurusCoreTests Swift) when both SPM and DerivedData
35- # caches miss. Empirically that's the budget on a
36- # `macos-latest` runner; PR #881 (run 24573707695) hit
37- # exactly the 15-min wall mid-Swift-compile because no
38- # DerivedData cache existed (main hadn't successfully
39- # saved one since PR #878).
44+ # caches miss. PR #951 (run 24937664669, attempt 2)
45+ # hit the prior 30-min wall mid-Swift-compile after
46+ # 27:27 in the xcodebuild step — that's the empirical
47+ # floor on `macos-26` with the SQLCipher amalgamation.
4048 # ~ 2– 3 min — actual `xcodebuild test` once the build is warm.
41- # ~ 5–10 min — buffer / future growth, runner variance.
49+ # ~10–15 min — buffer / future growth, runner variance.
4250 #
4351 # Once any successful run lands on `main`, the `Save DerivedData cache`
4452 # step at the bottom populates the cache and subsequent runs return to
45- # ~5 min total. The 30 -min ceiling is an "even a worst-case cold build
53+ # ~5 min total. The 45 -min ceiling is an "even a worst-case cold build
4654 # finishes" guard, NOT an expected duration. If you find yourself
4755 # raising it again, the right fix is to split this into a separate
4856 # build-cache-warm job that runs nightly on `main`, not to bump the
4957 # ceiling indefinitely.
50- timeout-minutes : 30
58+ timeout-minutes : 45
5159 env :
5260 WORKSPACE : osaurus.xcworkspace
5361 SPM_CACHE : .spm-cache
@@ -75,14 +83,53 @@ jobs:
7583
7684 - name : Restore DerivedData cache
7785 id : dd-cache
78- if : ${{ github.event_name != 'workflow_dispatch' || !inputs.clear_cache }}
86+ # Always restore so `cache-primary-key` is populated for the save
87+ # step at the bottom (the wipe step below handles forced cold
88+ # builds without preventing main from repopulating the cache).
7989 uses : actions/cache/restore@v5
8090 with :
8191 path : ~/Library/Developer/Xcode/DerivedData
82- key : dd-${{ runner.os }}-${{ env.CACHE_SALT }}-xcode${{ env.XCODE_VERSION }}-${{ hashFiles('osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved', 'Packages/**/*.swift', 'Packages/**/Package.swift', 'Packages/**/Resources/**') }}
92+ # Include vendored C sources (currently the SQLCipher amalgamation
93+ # under Packages/OsaurusCore/SQLCipher/). Without this, an
94+ # SQLCipher bump would land its new sqlite3.{c,h} but CI would
95+ # silently re-use a stale cached compile of the old code.
96+ key : dd-${{ runner.os }}-${{ env.CACHE_SALT }}-xcode${{ env.XCODE_VERSION }}-${{ hashFiles('osaurus.xcworkspace/xcshareddata/swiftpm/Package.resolved', 'Packages/**/*.swift', 'Packages/**/Package.swift', 'Packages/**/Resources/**', 'Packages/**/*.c', 'Packages/**/*.h') }}
8397 restore-keys : |
8498 dd-${{ runner.os }}-${{ env.CACHE_SALT }}-xcode${{ env.XCODE_VERSION }}-
8599
100+ # Make "clear the build cache" a one-click operation. Two triggers:
101+ # 1. `github.run_attempt != '1'` — i.e. a re-run. The default
102+ # "Re-run failed jobs" button is the natural place for someone
103+ # who just saw a build failure to land, so we make that the
104+ # intuitive escape hatch for cache poison: the first attempt
105+ # uses the cache (fast); any re-run forces a cold compile.
106+ # 2. `workflow_dispatch.clear_cache=true` — manual force-cold on
107+ # a fresh run (e.g. validating a CACHE_SALT bump before PRs
108+ # start hitting it).
109+ #
110+ # We wipe ONLY DerivedData, not the SPM cache. DerivedData holds
111+ # compiled object files / .swiftmodule / linked binaries — the
112+ # actual build outputs that can carry over a stale-source bug across
113+ # incremental builds. The SPM cache is just downloaded source code
114+ # pinned by `Package.resolved` checksums; it can't be "poisoned" in
115+ # any way that affects build correctness, and re-downloading it on
116+ # every re-run cost ~2 min in PR #951 run 24937664669 — wasted
117+ # budget that contributed to the 30-min cold-build cancellation.
118+ #
119+ # We wipe AFTER the restore step (rather than skipping the restore)
120+ # so `steps.dd-cache.outputs.cache-primary-key` stays populated and
121+ # the `Save DerivedData cache` step at the bottom can still
122+ # repopulate the cache on a successful `main` run.
123+ - name : Wipe restored DerivedData (re-run or workflow_dispatch clear_cache)
124+ if : ${{ github.run_attempt != '1' || (github.event_name == 'workflow_dispatch' && inputs.clear_cache) }}
125+ run : |
126+ REASON="run_attempt=${{ github.run_attempt }}"
127+ if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.clear_cache }}" = "true" ]; then
128+ REASON="$REASON, workflow_dispatch clear_cache=true"
129+ fi
130+ echo "::notice title=Cold build forced::Wiping restored DerivedData before build ($REASON). SPM cache preserved (it's source-only and pinned by Package.resolved). To re-run with the warm cache instead, push a new commit or trigger a fresh run."
131+ rm -rf "$HOME/Library/Developer/Xcode/DerivedData"
132+
86133 - name : Resolve dependencies
87134 run : >-
88135 xcodebuild -resolvePackageDependencies
@@ -133,7 +180,13 @@ jobs:
133180 run : echo "::notice title=test-core duration::$SECONDS seconds"
134181
135182 - name : Print failure summary
136- if : failure()
183+ # Also run on `cancelled()` so a job-timeout cancellation (e.g. a
184+ # cold build that ate the 45-min wall) still gets a Mode A diag
185+ # block in the GitHub UI instead of being a silent skip — see
186+ # PR #951 run 24937664669, where attempt 2's 27:27 cold compile
187+ # was killed by the prior 30-min timeout AND `Print failure
188+ # summary` was skipped because cancellation isn't `failure()`.
189+ if : ${{ failure() || cancelled() }}
137190 env :
138191 # Surface the cache outcome inside the summary so the next person
139192 # can immediately tell "cold-cache compile timeout" from "warm
@@ -185,13 +238,17 @@ jobs:
185238 if [ ! -d "$XCRESULT_PATH" ]; then
186239 if [ -z "$XCTEST_BINARY" ]; then
187240 # Mode A.
188- CACHE_NOTE="_(DerivedData cache hit: \`${DD_CACHE_HIT:-unknown}\`)_"
241+ CACHE_NOTE="_(DerivedData cache hit: \`${DD_CACHE_HIT:-unknown}\`, run attempt: \`${{ github.run_attempt }}\` )_"
189242 {
190243 echo "**Mode A — build phase did not complete (no xctest bundle on disk).**"
191244 echo
192- echo "Either a compile/link error fired (scroll the **Test OsaurusCore** log above for the first \`error:\` line), OR the cold build ran past the 30 -min job timeout. ${CACHE_NOTE}"
245+ echo "Either a compile/link error fired (scroll the **Test OsaurusCore** log above for the first \`error:\` line), OR the cold build ran past the 45 -min job timeout. ${CACHE_NOTE}"
193246 echo
194247 echo "If \`cache-hit: false\` AND no \`error:\` lines appear in the raw log, this is the cold-cache-timeout flavor. The fix is to land one successful run on \`main\` so the \`Save DerivedData cache\` step at the bottom of this job populates the cache; subsequent PR runs warm-start from it and finish in ~5 min. Re-running this same job will hit the cache the second time only IF the first attempt finishes successfully."
248+ echo
249+ echo "**\`run_attempt > 1\` AND \`cache-hit: false\`?** That's the deliberate cold-rebuild path triggered by **Re-run failed jobs** — see the \`Wipe restored DerivedData\` step in this job. If the cold build is exhausting the 45-min budget on every re-run, the codebase has outgrown the budget; bump \`timeout-minutes\` and update its comment block, OR move warm-cache priming to a nightly \`main\` job so PRs always warm-start."
250+ echo
251+ echo "**Suspect cache poisoning on a fresh attempt?** Click **Re-run failed jobs** — re-runs automatically wipe DerivedData (the SPM cache is preserved because it's pinned by \`Package.resolved\` and can't be poisoned)."
195252 } >> "$GITHUB_STEP_SUMMARY"
196253 else
197254 # Mode B.
@@ -244,7 +301,10 @@ jobs:
244301 ' >> "$GITHUB_STEP_SUMMARY" || true
245302
246303 - name : Upload xcresult on failure
247- if : failure()
304+ # Same `failure() || cancelled()` rationale as the failure-summary
305+ # step above: on a wall-timeout the xcresult bundle may be
306+ # partially populated and is still useful for postmortem.
307+ if : ${{ failure() || cancelled() }}
248308 uses : actions/upload-artifact@v5
249309 with :
250310 name : test-core-xcresult-${{ github.run_attempt }}
@@ -253,9 +313,10 @@ jobs:
253313 if-no-files-found : warn
254314
255315 # Save the cache only on `main` so a half-baked PR can never poison it.
256- # If `clear_cache=true` was passed via workflow_dispatch we also skip the
257- # save (the restore step didn't run, so `cache-primary-key` is empty);
258- # the next normal `main` push will repopulate the cache.
316+ # `actions/cache/save@v5` is a no-op when the key already exists, so a
317+ # forced cold re-run on `main` (which wipes DerivedData and rebuilds
318+ # from scratch) won't overwrite a known-good cache entry under the
319+ # same key. To intentionally invalidate every cache, bump CACHE_SALT.
259320 - name : Save DerivedData cache
260321 if : ${{ github.ref == 'refs/heads/main' && success() && steps.dd-cache.outputs.cache-primary-key != '' }}
261322 uses : actions/cache/save@v5
@@ -264,7 +325,8 @@ jobs:
264325 key : ${{ steps.dd-cache.outputs.cache-primary-key }}
265326
266327 test-cli :
267- runs-on : macos-latest
328+ # Pinned (was `macos-latest`).
329+ runs-on : macos-26
268330 timeout-minutes : 10
269331 steps :
270332 - name : Checkout code
@@ -290,7 +352,8 @@ jobs:
290352 run : echo "::notice title=test-cli duration::$SECONDS seconds"
291353
292354 swiftlint :
293- runs-on : macos-latest
355+ # Pinned (was `macos-latest`).
356+ runs-on : macos-26
294357 timeout-minutes : 10
295358 steps :
296359 - name : Checkout code
0 commit comments