diag+bc+gc: restore bc_current_bc across throws + fair safepoint yield #159
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ci | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| jobs: | |
| build: | |
| name: ${{ matrix.os }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| # Pinned runner labels so a future GHA image bump doesn't | |
| # silently shift the matrix. `ubuntu-24.04-arm` is the GHA | |
| # ARM64 Linux tier (added 2024); together with `ubuntu-24.04` | |
| # we cover both Linux architectures natively. `macos-14` is | |
| # Apple Silicon. `windows-2022` is x86_64 Windows. | |
| os: [ubuntu-24.04, ubuntu-24.04-arm, macos-14, windows-2022] | |
| runs-on: ${{ matrix.os }} | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| CC: ${{ matrix.os == 'windows-2022' && 'gcc' || 'cc' }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Compiler version | |
| run: | | |
| $CC --version | |
| # On ubuntu-24.04 the alt-versions are also available; print | |
| # them so a future regression that's gcc-13-specific (or | |
| # 12-specific) is easier to triage. | |
| if [ "${{ matrix.os }}" = "ubuntu-24.04" ] || [ "${{ matrix.os }}" = "ubuntu-24.04-arm" ]; then | |
| for v in 12 13 14; do | |
| if command -v gcc-$v >/dev/null; then | |
| gcc-$v --version | head -1 | |
| fi | |
| done | |
| fi | |
| - name: Bootstrap mino | |
| # Generates the bundled-source headers and compiles ./mino in | |
| # one step. Anything beyond bootstrap belongs in `./mino task`. | |
| # Tee stderr so a build failure leaves a captured log for the | |
| # post-step summary below; the live step output stays | |
| # unchanged for anyone with log access. | |
| run: | | |
| set -o pipefail | |
| make 2>&1 | tee /tmp/build.log | |
| - name: Surface build failure | |
| # When the build step above fails, post the captured tail on | |
| # the job summary page so anyone with Actions UI access (incl. | |
| # signed-in external contributors) sees what broke without | |
| # having to download artifacts. | |
| if: failure() | |
| run: | | |
| { | |
| echo "## Build failure (${{ matrix.os }})" | |
| echo '' | |
| echo '```' | |
| tail -60 /tmp/build.log 2>/dev/null || echo '(no build log captured)' | |
| echo '```' | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| - name: Upload build log on failure | |
| # Artifacts are downloadable from the Actions page anonymously | |
| # for public repositories, so this is the path for off-repo | |
| # observers (and for the project bot) to see the actual gcc | |
| # error output without needing log-download permission. | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-log-${{ matrix.os }} | |
| path: /tmp/build.log | |
| retention-days: 7 | |
| - name: Test | |
| # Run the suite runner directly so stdout streams. `task test` | |
| # wraps the subprocess in sh!, which buffers output until exit; | |
| # under a hang, no diagnostic ever surfaces. | |
| # | |
| # MINO_TEST_TRACE=1 prints one stderr line per deftest before | |
| # it runs; combined with capturing stderr into a file and | |
| # tee'ing it to the job log, the last visible trace line | |
| # pinpoints any hanging test. The stderr capture is what makes | |
| # an opaque "Test timed out" actionable. Keeps trace off | |
| # locally (env-gated) so a normal `./mino tests/run.clj` | |
| # produces the same output as before. | |
| # | |
| # Watchdog wrapper: GHA's timeout-minutes sends a SIGKILL after | |
| # the cap, which gives no diagnostic at all on a hang. We | |
| # spawn mino in the background, sleep just inside the cap, | |
| # then SIGABRT it -- mino's crash_handler (main.c:711) prints | |
| # a backtrace + gc stats on SIGABRT, so a hang now leaves a | |
| # readable stack in the log instead of a silent kill. mino | |
| # exits non-zero after the dump, which fails the step | |
| # normally (no continue-on-error masking). | |
| env: | |
| MINO_TEST_TRACE: "1" | |
| run: | | |
| set +e | |
| # Pre-create + tail the trace file so its lines stream to | |
| # the live job log as mino emits them. Without the tail, | |
| # the trace only appears via the failure artifact, which | |
| # makes a live `gh run watch` opaque. | |
| : > /tmp/test_trace.log | |
| (tail -F /tmp/test_trace.log 2>/dev/null) & | |
| TAIL_PID=$! | |
| # exec replaces the subshell with mino so $! is mino's | |
| # pid directly -- the watchdog's kill -ABRT then lands | |
| # on mino, not on an outer shell wrapper. | |
| (exec ./mino tests/run.clj) 2> /tmp/test_trace.log & | |
| MINO_PID=$! | |
| # Wake at 7m30s (the cap is 8m) so SIGABRT has time to | |
| # run mino's handler before GHA's own SIGKILL lands. | |
| # mino's crash_handler (main.c:711) prints a backtrace + | |
| # GC stats on SIGABRT, so a hang now leaves a readable | |
| # stack in the log + trace artifact instead of a silent | |
| # kill. mino exits 134 (128 + SIGABRT) after the dump. | |
| (sleep 450; if kill -0 $MINO_PID 2>/dev/null; then | |
| echo "##[warning]Watchdog firing SIGABRT on hung mino (pid $MINO_PID)" | |
| kill -ABRT $MINO_PID | |
| fi) & | |
| WD_PID=$! | |
| wait $MINO_PID | |
| RC=$? | |
| # Give the trace tail a moment to flush mino's last lines. | |
| sleep 1 | |
| kill $WD_PID 2>/dev/null || true | |
| kill $TAIL_PID 2>/dev/null || true | |
| exit $RC | |
| # Tests usually finish in seconds; a hang means a deadlock, not | |
| # a slow runner. Cap so we get diagnostic output instead of | |
| # waiting on the 6h job-default timeout. The watchdog above | |
| # fires 30s before this cap so we keep the stack trace. | |
| timeout-minutes: 8 | |
| # The Windows test suite has documented divergence: cmd.exe's | |
| # echo emits a trailing space before \n, which the proc-test | |
| # assertions do not strip. Build still must pass; tests are | |
| # informational on Windows until the suite is portable. | |
| continue-on-error: ${{ matrix.os == 'windows-2022' }} | |
| - name: Upload test trace on failure | |
| # The trace captures each deftest entry; the last line shows | |
| # what was running when the timeout fired. Artifacts are | |
| # public on the run page so triage doesn't require log | |
| # download permission. | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: test-trace-${{ matrix.os }} | |
| path: /tmp/test_trace.log | |
| retention-days: 7 | |
| if-no-files-found: ignore | |
| - name: Release gate | |
| # The composite gate -- check-reloc-mirror, check-stencil- | |
| # registry, the test suite under ASan, and 4-way JIT parity | |
| # (auto / on / off / lean) -- runs on every non-Windows | |
| # matrix entry. Windows skips the gate because the gate's | |
| # ASan step needs a libsanitizer that mingw doesn't ship. | |
| # | |
| # `check-stencils-fresh` is intentionally NOT in the gate: | |
| # it regenerates stencils with the host `cc`, which means | |
| # the committed bytes have to byte-match whatever toolchain | |
| # the runner ships. Dev (Apple clang 17), macos-14 (Apple | |
| # clang 15), and ubuntu-24.04 (gcc, no musttail support) | |
| # diverge enough that the byte check is structurally | |
| # incompatible with the matrix. Stale-commit hygiene is a | |
| # dev pre-commit step; CI correctness is gated by the test | |
| # suite + ASan + 4-way parity, which catch the actual | |
| # runtime impact of a stale stencil regardless of compiler. | |
| if: matrix.os != 'windows-2022' | |
| run: ./mino task release-gate | |
| timeout-minutes: 12 | |
| # NOTE: a previous workflow had a `cross-compile` job here that | |
| # regenerated each target's stencil header on macos-14 and diffed | |
| # against committed bytes. It was removed when the dev / CI clang | |
| # version split (Apple clang 17 locally vs Apple clang 15 on | |
| # macos-14) made byte-identity infeasible across hosts. Stale- | |
| # commit hygiene is now a dev pre-commit step | |
| # (`./mino task check-stencils-fresh`); CI correctness is gated by | |
| # the test suite, ASan, and 4-way JIT parity, which catch the | |
| # runtime impact of a stale stencil regardless of compiler version. | |
| external-test-suite: | |
| name: external-test-suite (clojure-test-suite) | |
| runs-on: ubuntu-latest | |
| needs: build | |
| # The external test suite is a snapshot of jank-lang/clojure-test-suite | |
| # run against the current mino build. Each .cljc file runs in its own | |
| # `./mino` sub-process under a 30s timeout (handled by the driver), | |
| # so a segfault or hang in one file doesn't take down the rest. | |
| # Informational: the job reports the supported / failed / errored | |
| # counts but does not gate merges. | |
| continue-on-error: true | |
| timeout-minutes: 20 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| path: mino | |
| - name: Checkout clojure-test-suite | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: jank-lang/clojure-test-suite | |
| path: clojure-test-suite | |
| - name: Bootstrap mino | |
| working-directory: mino | |
| run: make | |
| - name: Run external suite | |
| working-directory: mino | |
| # The driver expects the suite as a sibling of mino/, which the | |
| # checkout layout above already gives us. tee the output so the | |
| # next step can pull the summary without re-running the suite. | |
| run: | | |
| set -o pipefail | |
| ./mino tests/clojure_test_suite.clj 2>&1 \ | |
| | tee /tmp/external_suite.log | |
| - name: Surface aggregate counts | |
| # Pull the summary block out of the captured log and post it as | |
| # the job summary so the Actions UI shows pass / fail counts | |
| # without scrolling. | |
| if: always() | |
| run: | | |
| { | |
| echo "## External test suite summary" | |
| echo '' | |
| echo '```' | |
| sed -n '/EXTERNAL TEST SUITE REPORT/,/Per-file raw results/p' \ | |
| /tmp/external_suite.log 2>/dev/null \ | |
| | head -100 || true | |
| echo '```' | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| perf-gate: | |
| name: perf-gate (linux) | |
| runs-on: ubuntu-latest | |
| needs: build | |
| # Shared GitHub-hosted runners are CPU-noisy and the runner image | |
| # itself drifts (ubuntu-latest tracks the newest stable image), so | |
| # the perf-gate is informational here. Local runs and the dedicated | |
| # mino-bench workflow remain the authoritative signal. | |
| continue-on-error: true | |
| steps: | |
| - name: Checkout mino at current SHA | |
| uses: actions/checkout@v4 | |
| with: | |
| path: mino-head | |
| - name: Checkout mino-bench | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: leifericf/mino-bench | |
| path: mino-bench | |
| submodules: recursive | |
| - name: Override mino submodule to this SHA | |
| working-directory: mino-bench | |
| run: | | |
| rm -rf mino | |
| cp -R ../mino-head mino | |
| - name: Bootstrap mino | |
| working-directory: mino-bench/mino | |
| run: make | |
| - name: Build bench binaries | |
| working-directory: mino-bench | |
| run: ./mino/mino task build | |
| - name: Run perf gate | |
| working-directory: mino-bench | |
| run: ./mino/mino task perf-gate |