Skip to content

Use pooch for dataset downloads (#244) #291

Use pooch for dataset downloads (#244)

Use pooch for dataset downloads (#244) #291

Workflow file for this run

name: Run Notebooks
on:
push:
branches: [main]
pull_request:
branches: [main]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
run:
runs-on: ubuntu-latest
strategy:
matrix:
notebook: [
"docs/tutorials/getting_started.ipynb",
"docs/tutorials/real_dataset_example_physionet2019.ipynb",
"docs/tutorials/omop_intro.ipynb",
"docs/tutorials/omop_ml.ipynb",
"docs/tutorials/interactive_visualization.ipynb",
]
steps:
- uses: actions/checkout@v5
with:
submodules: "true"
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install UV
run: pip install uv
- name: Install ehrdata and additional dependencies
run: uv pip install --system . nbconvert ipykernel graphviz torch ehrapy "vitessce[all]" bottleneck ome_zarr negspy
# Restore example datasets so flaky/blocked upstream hosts (physionet.org etc.) don't break the tutorial runs.
# Keyed per notebook because each notebook downloads a different subset; bump the version on URL changes.
- name: Restore datasets cache
id: data-cache
uses: actions/cache/restore@v4
with:
path: docs/tutorials/ehrapy_data
key: ehrapy-data-notebook-${{ matrix.notebook }}-v2
- name: Run ${{ matrix.notebook }} Notebook
run: jupyter nbconvert --to notebook --execute ${{ matrix.notebook }}
# Only persist after a green run that had to download (cache miss), so a partial/failed download is never saved under the (immutable) key.
- name: Save datasets cache
if: steps.data-cache.outputs.cache-hit != 'true' && success()
uses: actions/cache/save@v4
with:
path: docs/tutorials/ehrapy_data
key: ehrapy-data-notebook-${{ matrix.notebook }}-v2