Skip to content

backend: centralize data-coercion helpers into galpy.backend._coerce … #116

backend: centralize data-coercion helpers into galpy.backend._coerce …

backend: centralize data-coercion helpers into galpy.backend._coerce … #116

Workflow file for this run

name: All-backend tests
# =============================================================================
# Pillar 1: run galpy's EXISTING test suite under the array-API backends
# (--backend=jax and --backend=torch), kept GREEN via the checked-in
# xfail-ledger (tests/backend_xfail.txt + the pytest_collection_modifyitems hook
# in tests/conftest.py). This lives in its OWN workflow (separate from
# build.yml's numpy matrix and from the existing test_backend*.py jax/torch job)
# so the numpy CI is byte-for-byte untouched.
#
# Triggers / gating (deliberately NOT every push/PR in the repo):
# * push to feat/backends -> FULL matrix (backend-suite)
# * schedule (weekly cron) -> FULL matrix (backend-suite)
# * workflow_dispatch (regen toggle) -> FULL matrix (backend-suite)
# * pull_request targeting feat/backends -> FULL matrix (backend-suite)
# so a sub-PR into the integration branch gets the COMPLETE per-file x
# backend table before merge (this is what populates the status report on a
# PR). The full matrix is gated with a job-level `if` so that even though the
# workflow fires on pull_request, the suite ONLY runs for PRs whose BASE is
# feat/backends (and only on push/schedule/dispatch otherwise).
#
# The `report` job (needs backend-suite, if: always()) downloads every shard's
# JUnit XML artifact, renders a per-file x backend markdown status table via
# tests/backend_status_report.py, writes it to the run's step-summary, uploads
# it as the `backend-status-report` artifact, and -- on a pull_request -- posts
# it as a STICKY PR comment (overwritten on each re-run) so the burndown is
# visible inline on the PR.
# =============================================================================
on:
push:
branches:
- feat/backends
pull_request:
# Only sub-PRs that TARGET the integration branch, so they get the full
# per-file backend table before merge -- NOT every PR in the repo.
branches:
- feat/backends
schedule:
- cron: '0 20 * * 2'
# Manual trigger: run the full all-backend suite on demand and (optionally)
# regenerate the backend xfail-ledger from a real run (regen=true wires
# GALPY_BACKEND_XFAIL_REGEN=1 into the backend job; see backend-suite).
workflow_dispatch:
inputs:
regen:
description: 'Regenerate the backend xfail-ledger (writes backend_xfail_new.txt as an artifact instead of xfailing)'
type: boolean
required: false
default: false
# The report job posts a sticky PR comment on pull_request runs.
permissions:
contents: read
pull-requests: write
jobs:
# ===========================================================================
# Full all-backend existing-suite job (the heavy "frequent but cheap" run).
#
# Mirrors the FULL set of ubuntu TEST_FILES shards from build.yml's numpy
# `build` matrix (same files, same REQUIRES_* dependency provisioning, same
# GSL/torus/C-extension build, same special .galpyrc configs) -- crossed with
# BACKEND in {jax, torch}, CPU-only. The checked-in xfail-ledger keeps the run
# GREEN via xfail(strict=False): a ledgered test is green whether it fails OR
# (flakily) passes, so the slow-jax tests that flip pass<->300s-timeout no
# longer red the run. Only a genuinely un-ledgered failure/error reds it (still
# catches regressions). Burndown -- both adding newly-failing and dropping
# now-passing tests -- is done by the scheduled regen run, which rewrites the
# ledger from real (no-xfail) outcomes.
#
# GATED to run on push-to-feat/backends, the weekly schedule, a manual
# workflow_dispatch, OR a pull_request whose BASE is feat/backends (so a sub-PR
# into the integration branch gets the full per-file table before merge).
# Because the whole workflow only fires for feat/backends (push/PR branch
# filters above), it never runs on ordinary pushes/PRs elsewhere in the repo;
# the job-level `if` additionally pins the PR case to base==feat/backends.
backend-suite:
name: backend ${{ matrix.BACKEND }} ${{ matrix.TEST_FILES }}
runs-on: ubuntu-latest
# Backstop: even with the per-test --timeout below, cap the whole shard so a
# pathological accumulation of slow tests can't keep the report job waiting.
timeout-minutes: 90
if: |
github.event_name == 'schedule'
|| github.event_name == 'workflow_dispatch'
|| (github.event_name == 'push' && github.ref == 'refs/heads/feat/backends')
|| (github.event_name == 'pull_request' && github.base_ref == 'feat/backends')
strategy:
fail-fast: false
matrix:
# Both axes are genuine cross-product axes -> every TEST_FILES shard
# runs under BOTH backends (BACKEND x TEST_FILES). The per-shard
# REQUIRES_* dependency flags are attached below via `include`, which
# MERGES into each matching {BACKEND, TEST_FILES} combination (so a flag
# set on a TEST_FILES value applies to both the jax and torch run of
# that shard). This mirrors the FULL ubuntu TEST_FILES sharding of
# build.yml's numpy `build` matrix. The test_backend*.py shard is
# intentionally omitted: those tests already run under BOTH jax and
# torch in the numpy build (and several manage their own backend). The
# duplicated multi-python / macOS orbit shards are not repeated (one
# ubuntu orbit shard each).
BACKEND: [jax, torch]
TEST_FILES:
- tests/test_actionAngle.py
- tests/test_sphericaldf.py
# test_actionAngleTorus.py is EXCLUDED from the all-backend goal (it
# wraps the external C++ Torus library -- never backend-agnostic).
- tests/test_conversion.py tests/test_galpypaper.py tests/test_import.py tests/test_interp_potential.py tests/test_kuzminkutuzov.py tests/test_util.py
- tests/test_SpiralArmsPotential.py tests/test_scf.py tests/test_MultipoleExpansionPotential.py tests/test_snapshotpotential.py
# test_potential.py: #977 parametrized its per-potential loop-tests
# (~1300 jax cases) -> serial jax (~3h) overflows the 75-min session-
# timeout. Split into 5 serial sub-shards via pytest-split (single
# process: partitions the collected tests, no pickling -- xdist can't
# pickle several mock/C-backed potentials). ~44min/jax group.
- tests/test_potential.py --splits 5 --group 1
- tests/test_potential.py --splits 5 --group 2
- tests/test_potential.py --splits 5 --group 3
- tests/test_potential.py --splits 5 --group 4
- tests/test_potential.py --splits 5 --group 5
- tests/test_quantity.py tests/test_coords.py
- tests/test_orbit.py -k 'test_energy_jacobi_conservation or from_name'
- tests/test_orbit.py tests/test_orbits.py -k 'not test_energy_jacobi_conservation'
- tests/test_evolveddiskdf.py
- tests/test_jeans.py tests/test_dynamfric.py tests/test_FDMdynamfric.py
- tests/test_qdf.py tests/test_pv2qdf.py tests/test_streamgapdf_impulse.py tests/test_noninertial.py
- tests/test_streamgapdf.py
- tests/test_diskdf.py
# useTM=True streamdf tests need actionAngleTorus (external C++ Torus
# lib) -> out of scope for non-numpy backends; deselected here.
- tests/test_streamdf.py tests/test_streamspraydf.py tests/test_streamTrack.py -k 'not useTM'
# Attach the per-shard dependency flags. An include entry whose
# TEST_FILES matches an existing combination MERGES its extra keys into
# that combination (for BOTH backends); it does NOT add a new job. Only
# the shards that need an extra are listed; the rest default to false.
include:
- TEST_FILES: tests/test_SpiralArmsPotential.py tests/test_scf.py tests/test_MultipoleExpansionPotential.py tests/test_snapshotpotential.py
REQUIRES_PYNBODY: true
# all 5 test_potential split-groups need pynbody (the SnapshotRZPotential
# tests may land in any group under pytest-split's partition).
- TEST_FILES: tests/test_potential.py --splits 5 --group 1
REQUIRES_PYNBODY: true
- TEST_FILES: tests/test_potential.py --splits 5 --group 2
REQUIRES_PYNBODY: true
- TEST_FILES: tests/test_potential.py --splits 5 --group 3
REQUIRES_PYNBODY: true
- TEST_FILES: tests/test_potential.py --splits 5 --group 4
REQUIRES_PYNBODY: true
- TEST_FILES: tests/test_potential.py --splits 5 --group 5
REQUIRES_PYNBODY: true
- TEST_FILES: tests/test_quantity.py tests/test_coords.py
REQUIRES_ASTROPY: true
- TEST_FILES: tests/test_orbit.py -k 'test_energy_jacobi_conservation or from_name'
REQUIRES_PYNBODY: true
REQUIRES_ASTROPY: true
REQUIRES_ASTROQUERY: true
- TEST_FILES: tests/test_orbit.py tests/test_orbits.py -k 'not test_energy_jacobi_conservation'
REQUIRES_PYNBODY: true
REQUIRES_ASTROPY: true
REQUIRES_ASTROQUERY: true
- TEST_FILES: tests/test_qdf.py tests/test_pv2qdf.py tests/test_streamgapdf_impulse.py tests/test_noninertial.py
REQUIRES_NUMBA: true
env:
MPLBACKEND: Agg
# CPU-only for both backends on the CI runner.
JAX_PLATFORMS: cpu
CUDA_VISIBLE_DEVICES: ""
# Wire the workflow_dispatch regen toggle into the ledger hook.
GALPY_BACKEND_XFAIL_REGEN: ${{ (github.event_name == 'workflow_dispatch' && inputs.regen) && '1' || '0' }}
# Write the regenerated ledger into the CI workspace (the conftest hook's
# default output path is a local-worktree path); upload-artifact picks it
# up from here. Per-backend filename so the two backends never race.
GALPY_BACKEND_XFAIL_OUT: ${{ github.workspace }}/backend_xfail_new_${{ matrix.BACKEND }}.txt
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
allow-prereleases: true
- name: Install the GSL
run: sudo apt-get install libgsl-dev
- name: Install Python dependencies
# pytest-timeout: kill any single test running >300 s (see the pytest step's
# --timeout) so a hung backend trace (e.g. the SCFPotential jax XLA-graph
# blow-up) is recorded as a FAILURE instead of stalling the whole run.
run: pip install --upgrade --upgrade-strategy eager numpy scipy matplotlib numexpr setuptools cython tqdm pytest-timeout pytest-split
- name: Install pynbody
if: ${{ matrix.REQUIRES_PYNBODY }}
run: |
pip install --upgrade --upgrade-strategy eager h5py pandas pytz
pip install --upgrade --upgrade-strategy eager wheel
CC=gcc-12 CXX=g++-12 pip install --upgrade --upgrade-strategy eager pynbody
- name: Install astropy
if: ${{ matrix.REQUIRES_ASTROPY }}
run: pip install astropy pyerfa
- name: Install astroquery (latest release)
if: ${{ matrix.REQUIRES_ASTROQUERY && contains(matrix.TEST_FILES,'test_energy_jacobi_conservation or from_name') }}
run: pip install astroquery
- name: Install astroquery (bleeding edge)
if: ${{ matrix.REQUIRES_ASTROQUERY && contains(matrix.TEST_FILES,'not test_energy_jacobi_conservation') }}
run: pip install --pre astroquery
- name: Install numba
if: ${{ matrix.REQUIRES_NUMBA }}
run: |
pip install numba
# numba may force install an older version of setuptools,
# but it isn't actually a *runtime* requirement numba/numba#8366
pip install --upgrade --force-reinstall setuptools
- name: Install JAX
if: ${{ matrix.BACKEND == 'jax' }}
run: pip install jax jaxlib array-api-compat diffrax
- name: Install PyTorch (CPU)
if: ${{ matrix.BACKEND == 'torch' }}
run: |
pip install torch --index-url https://download.pytorch.org/whl/cpu
pip install array-api-compat torchdiffeq
# NOTE: the external C++ Torus library is intentionally NOT built here.
# galpy imports and works fully without it; only actionAngleTorus needs it,
# and that is out of scope for the backend goal (test_actionAngleTorus.py is
# excluded and the useTM=True streamdf tests are deselected). Keeping it out
# speeds up every shard.
- name: Install package
env:
GALPY_COMPILE_NO_OPENMP: 1
GALPY_COMPILE_SINGLE_EXT: 1
run: |
python -m pip install --no-build-isolation -ve ".[test]"
python setup.py build_ext --inplace
- name: Special config
env:
TEST_FILES: ${{ matrix.TEST_FILES }}
run: |
if [[ $TEST_FILES == 'tests/test_evolveddiskdf.py' ]]; then echo -e '[normalization]' > $HOME/.galpyrc && echo -e 'ro = 8.' >> $HOME/.galpyrc && echo -e 'vo = 220.' >> $HOME/.galpyrc; fi
if [[ $TEST_FILES == 'tests/test_diskdf.py' ]]; then echo -e '[normalization]' > $HOME/.galpyrc && echo -e 'ro = 8.' >> $HOME/.galpyrc && echo -e '[astropy]' >> $HOME/.galpyrc && echo -e 'astropy-units = False' >> $HOME/.galpyrc && echo -e '[plot]' >> $HOME/.galpyrc && echo -e 'seaborn-bovy-defaults = True' >> $HOME/.galpyrc && echo -e '[warnings]' >> $HOME/.galpyrc && echo -e 'verbose = True' >> $HOME/.galpyrc; fi
- name: Compute shard id + junit path
# Single source of truth for the sanitized shard id is
# tests/backend_status_report.py --id (reads $TEST_FILES from env, so the
# embedded -k '...' needs no shell quoting). The report job recomputes the
# SAME ids to map each artifact back to a (backend, shard) table cell.
env:
TEST_FILES: ${{ matrix.TEST_FILES }}
BACKEND: ${{ matrix.BACKEND }}
run: |
SHARD_ID=$(python tests/backend_status_report.py --id)
echo "SHARD_ID=${SHARD_ID}" >> "$GITHUB_ENV"
echo "JUNIT_PATH=${GITHUB_WORKSPACE}/backend-junit-${BACKEND}-${SHARD_ID}.xml" >> "$GITHUB_ENV"
- name: Test with pytest (backend=${{ matrix.BACKEND }})
env:
TEST_FILES: ${{ matrix.TEST_FILES }}
BACKEND: ${{ matrix.BACKEND }}
run: |
# -rxX reports XFAIL/XPASS; junit xml drives the burndown summary and the
# report job's per-file table. Do NOT pass -W error here: under a backend
# the existing suite emits many benign warnings that are not part of the
# ledger contract. `|| true` so a non-zero exit (e.g. an un-ledgered
# failure) still reaches the burndown summary step; the explicit gate in
# the re-fail step re-fails the job when appropriate.
# --timeout=300: kill any single test that runs >300 s and record it as a
# FAILURE instead of letting it hang the shard (the SCFPotential/DiskSCF/
# Multipole jax XLA-graph blow-up -- a Python-unrolled harmonic sum --
# otherwise stalls the run for hours so the `report` job never posts).
# --timeout-method=signal (NOT thread): the jax hang is in Python-level
# XLA tracing, which is interruptible at a bytecode boundary, so signal
# raises Timeout in the test, the test is recorded as a normal FAILED in
# the junit XML, AND the session CONTINUES to the remaining tests. The
# thread method instead os._exit()s the whole pytest process on timeout,
# which kills it BEFORE the junit XML is written -> the shard produces no
# junit at all (report cell "(no result)", nothing recorded). signal is
# therefore what actually meets the "killed and recorded as a FAILURE"
# goal here. --session-timeout=4500 is a hard 75-min ceiling on the whole
# shard in case a future test hangs in a signal-uninterruptible C call;
# the job-level timeout-minutes: 90 is the final backstop above that.
set -o pipefail
eval "pytest -p no:cacheprovider -v $TEST_FILES \
--backend=$BACKEND \
--timeout=300 --timeout-method=signal --session-timeout=4500 \
-rxX --junitxml=${JUNIT_PATH} \
--disable-pytest-warnings" | tee backend-pytest.log || true
- name: Backend burndown summary
if: ${{ always() }}
env:
BACKEND: ${{ matrix.BACKEND }}
run: |
python - "$BACKEND" "$JUNIT_PATH" <<'PY' >> "$GITHUB_STEP_SUMMARY"
import sys, xml.etree.ElementTree as ET
backend, path = sys.argv[1], sys.argv[2]
passed = xfailed = xpassed = failed = errored = skipped = deferred = total = 0
try:
root = ET.parse(path).getroot()
except Exception as e:
print(f"Backend {backend}: could not parse junit xml ({e})")
sys.exit(0)
for tc in root.iter("testcase"):
total += 1
children = list(tc)
tags = {c.tag for c in children}
if "error" in tags:
errored += 1
continue
if "failure" in tags:
failed += 1
continue
skip = tc.find("skipped")
if skip is not None:
# pytest encodes xfail/xpass via the skipped/<properties>;
# use the message to disambiguate.
msg = ((skip.get("message") or "") + (skip.text or "")).lower()
if "xfail" in msg:
xfailed += 1
elif "backend-slow-skip" in msg:
deferred += 1 # unrunnable-until-vectorized (own burndown)
else:
skipped += 1
continue
passed += 1
# XPASS(strict) shows up as a <failure> with an xpass message; recount.
# Under the strict=False ledger a now-passing ledgered test is a plain
# pass instead, so this is structurally 0 on a push -- burndown
# candidates come from the scheduled regen run, not per-push XPASS.
xpassed = 0
for tc in root.iter("testcase"):
fail = tc.find("failure")
if fail is not None:
msg = (fail.get("message") or "") + (fail.text or "")
if "xpass" in msg.lower():
xpassed += 1
failed -= 1 # was counted as failed above
line = (f"Backend {backend}: {passed} passed, {xfailed} xfail (ledger), "
f"{deferred} deferred (slow-skip), {xpassed} XPASS(fix-me)")
if failed or errored:
line += f", {failed} FAILED, {errored} ERROR (NOT in ledger)"
print(line)
PY
cat "$GITHUB_STEP_SUMMARY"
- name: Re-fail on un-ledgered failures
if: ${{ always() && env.GALPY_BACKEND_XFAIL_REGEN != '1' }}
run: |
# The pytest step swallowed its exit code so the summary always renders.
# Re-derive pass/fail from junit: any <failure> or <error> fails the job.
# Under the strict=False ledger a now-passing ledgered test is a plain
# pass (no <failure>), so XPASS never reds the run -- only genuinely
# un-ledgered failures/errors do, which still catches real regressions.
python - "$JUNIT_PATH" <<'PY'
import sys, xml.etree.ElementTree as ET
root = ET.parse(sys.argv[1]).getroot()
bad = 0
for tc in root.iter("testcase"):
if tc.find("failure") is not None or tc.find("error") is not None:
bad += 1
if bad:
print(f"{bad} un-ledgered failures/errors (NOT in the xfail-ledger); see log.")
sys.exit(1)
print("All green: every failure is covered by the xfail-ledger.")
PY
- name: Upload shard JUnit XML
# Always upload (even when the shard FAILS or the pytest step crashed) so
# the report job can render this cell; a missing artifact -> "(no result)".
# Name is unique per BACKEND x shard so the 26 uploads never collide.
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: backend-junit-${{ matrix.BACKEND }}-${{ env.SHARD_ID }}
path: ${{ env.JUNIT_PATH }}
if-no-files-found: warn
- name: Upload regenerated ledger (regen mode)
if: ${{ always() && env.GALPY_BACKEND_XFAIL_REGEN == '1' }}
uses: actions/upload-artifact@v4
with:
name: backend-xfail-new-${{ matrix.BACKEND }}-${{ strategy.job-index }}
path: backend_xfail_new_${{ matrix.BACKEND }}.txt
if-no-files-found: warn
# ===========================================================================
# All-backend status report (per-file x backend burndown table).
#
# Downloads every backend-suite shard's JUnit XML artifact and renders a
# markdown table (ROWS = the 13 TEST_FILES shards, COLUMNS = jax / torch) via
# the checked-in tests/backend_status_report.py. The table is written to the
# run's step-summary, uploaded as the `backend-status-report` artifact, and --
# on a pull_request -- posted as a STICKY PR comment (overwritten on each
# re-run via the fixed `header: backend-status` marker).
#
# needs backend-suite + if: always() so the report renders even when some
# shards FAIL or crash; a shard that produced no XML shows as "(no result)"
# rather than failing this job (the report script is robust to missing XML).
report:
name: backend status report
needs: [backend-suite]
runs-on: ubuntu-latest
if: ${{ always() }}
permissions:
contents: read
pull-requests: write
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
allow-prereleases: true
- name: Download all shard JUnit XML artifacts
uses: actions/download-artifact@v4
with:
# Each shard uploaded backend-junit-<backend>-<sid>; unpack them all
# into junit-artifacts/ (one sub-dir per artifact). Match on prefix so
# the regen-ledger artifacts are not pulled in.
pattern: backend-junit-*
path: junit-artifacts
merge-multiple: false
continue-on-error: true
- name: Render status table
env:
GITHUB_SHA: ${{ github.sha }}
run: |
# Robust to a missing junit dir (e.g. all shards skipped/crashed): make
# sure the dir exists so the script just renders an all-"(no result)"
# table instead of failing.
mkdir -p junit-artifacts
python tests/backend_status_report.py junit-artifacts status.md
{
echo "<!-- backend-status -->"
cat status.md
} >> "$GITHUB_STEP_SUMMARY"
- name: Upload status report
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: backend-status-report
path: status.md
if-no-files-found: warn
- name: Sticky PR comment (sub-PR targeting feat/backends)
# Overwrites the previous comment on re-runs via the fixed header marker,
# so a PR keeps exactly one always-current backend-status comment.
if: ${{ github.event_name == 'pull_request' }}
uses: marocchino/sticky-pull-request-comment@v2
with:
header: backend-status
path: status.md
- name: Resolve the integration PR (feat/backends -> main)
# On a push to feat/backends the run has no PR context, so look up the
# open integration PR (head feat/backends, base main) to comment on it --
# WITHOUT adding a pull_request trigger for base main (that would make
# pushes to feat/backends run the suite twice).
id: intpr
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/feat/backends' }}
env:
GH_TOKEN: ${{ github.token }}
run: |
num=$(gh pr list --repo "$GITHUB_REPOSITORY" --head feat/backends --base main \
--state open --json number --jq '.[0].number // empty')
echo "number=$num" >> "$GITHUB_OUTPUT"
- name: Sticky comment on the integration PR (push to feat/backends)
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/feat/backends' && steps.intpr.outputs.number != '' }}
uses: marocchino/sticky-pull-request-comment@v2
with:
header: backend-status
number: ${{ steps.intpr.outputs.number }}
path: status.md