From 6bc4043d542cb07428a0e9f8f228f1e527f18e68 Mon Sep 17 00:00:00 2001 From: Thomas S Date: Mon, 16 Jun 2025 11:53:59 +0200 Subject: [PATCH] ci: Add end-to-end workflow --- .github/workflows/end-to-end.yml | 127 +++++++++++++++++++ .github/workflows/lint.yml | 2 +- .pre-commit-config.yaml | 10 +- ci/tests/end-to-end/conftest.py | 71 +++++++++++ ci/tests/end-to-end/test_import.py | 101 +++++++++++++++ sphinx/sphinxext/matplotlib_skore_scraper.py | 2 +- 6 files changed, 305 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/end-to-end.yml create mode 100644 ci/tests/end-to-end/conftest.py create mode 100644 ci/tests/end-to-end/test_import.py diff --git a/.github/workflows/end-to-end.yml b/.github/workflows/end-to-end.yml new file mode 100644 index 0000000000..1e1c727ce5 --- /dev/null +++ b/.github/workflows/end-to-end.yml @@ -0,0 +1,127 @@ +name: end-to-end + +on: + pull_request: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: {} + +defaults: + run: + shell: bash + +env: + PACKAGES: '["skore","skore-hub-project","skore-local-project"]' + +jobs: + end-to-end-changes: + runs-on: ubuntu-latest + outputs: + modified-packages: ${{ steps.filter.outputs.changes }} + permissions: + pull-requests: read + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Create dynamically filtering file, based on `env.PACKAGES` + id: setup + run: | + echo "${PACKAGES}" | jq -r '.[]' | while read package; do + >>${FILEPATH} echo "${package}: + - '.github/workflows/end-to-end.yml' + - 'ci/requirements/${package}/**' + - '${package}/**'" + done + env: + FILEPATH: ${{ runner.temp }}/filters.yaml + + - name: Define if at least one file has changed + id: filter + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 + with: + filters: ${{ runner.temp }}/filters.yaml + + + end-to-end: + runs-on: ubuntu-latest + needs: [end-to-end-changes] + if: ${{ (github.event_name == 'push') || (needs.pytest-changes.outputs.modified-packages != '[]') }} + permissions: + contents: read + steps: + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Setup Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + id: setup-python + with: + python-version: 3.13 + check-latest: True + cache: pip + + - name: Restore python-venv + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + id: cache-python-venv + with: + path: venv/ + key: >- + python-venv + -ubuntu-latest + -3.13 + -${{ hashFiles('ci/requirements/skore/python-3.13/scikit-learn-1.7/test-requirements.txt') }} + -${{ hashFiles('ci/requirements/skore-hub-project/python-3.13/scikit-learn-1.7/test-requirements.txt') }} + -${{ hashFiles('ci/requirements/skore-local-project/python-3.13/scikit-learn-1.7/test-requirements.txt') }} + + - name: Setup python-venv + run: | + set -eu + + # Ensure venv is created + python -m venv venv + + # Activate venv for each step depending on the OS + echo "${GITHUB_WORKSPACE}/venv/bin" >> ${GITHUB_PATH} + echo "VIRTUAL_ENV=${GITHUB_WORKSPACE}/venv" >> ${GITHUB_ENV} + + - name: Install dependencies in python-venv + if: steps.cache-python-venv.outputs.cache-hit != 'true' + run: | + python -m pip install --upgrade pip build + + echo "${PACKAGES}" | jq -r '.[]' | while read package; do + python -m pip install --requirement "ci/requirements/${package}/python-3.13/scikit-learn-1.7/test-requirements.txt" + done + + - name: Save python-venv + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + if: steps.cache-python-venv.outputs.cache-hit != 'true' + with: + path: venv/ + key: ${{ steps.cache-python-venv.outputs.cache-primary-key }} + + - name: Build and install + run: | + echo "${PACKAGES}" | jq -r '.[]' | while read package; do + ( + cd "${package}/" + + # build + python -m build + + # install + wheel=(dist/*.whl); python -m pip install --force-reinstall --no-deps "${wheel}" + ) + done + + - name: Test + timeout-minutes: 10 + working-directory: ci/tests/end-to-end + run: python -m pytest --import-mode=importlib --no-header --verbosity=2 --dist=loadscope --numprocesses auto --no-cov diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 9a546fb0c8..d7cf012402 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -46,5 +46,5 @@ jobs: pre-commit run --all-files trailing-whitespace pre-commit run --all-files typos pre-commit run --all-files nbstripout - pre-commit run --all-files ruff + pre-commit run --all-files ruff-check pre-commit run --all-files mypy diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 493eeab24f..f85a210de1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/crate-ci/typos - rev: v1.30.2 + rev: v1.33.1 hooks: - id: typos @@ -22,16 +22,14 @@ repos: args: [--keep-id] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.10 + rev: v0.11.13 hooks: - - id: ruff - files: ^((skore|skore-hub-project|skore-local-project)/(hatch|src|tests))|(examples)/ + - id: ruff-check args: [--fix] - id: ruff-format - files: ^((skore|skore-hub-project|skore-local-project)/(hatch|src|tests))|(examples)/ - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.15.0 + rev: v1.16.1 hooks: - id: mypy files: ^(skore|skore-hub-project|skore-local-project)/src/ diff --git a/ci/tests/end-to-end/conftest.py b/ci/tests/end-to-end/conftest.py new file mode 100644 index 0000000000..503e56dd47 --- /dev/null +++ b/ci/tests/end-to-end/conftest.py @@ -0,0 +1,71 @@ +from pytest import fixture + + +def pytest_configure(config): + import matplotlib + + # Use a non-interactive ``matplotlib.backend`` that can only write to files. + # + # https://github.com/matplotlib/matplotlib/issues/29119 + # https://matplotlib.org/stable/users/explain/figure/backends.html#selecting-a-backend + matplotlib.use("agg") + + +@fixture +def monkeypatch_tmpdir(monkeypatch, tmp_path): + """ + Change ``TMPDIR`` used by ``tempfile.gettempdir()`` to point to ``tmp_path``, so + that it is automatically deleted after use, with no impact on user's environment. + + Force the reload of the ``tempfile`` module to change the cached return of + ``tempfile.gettempdir()``. + + https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir + """ + import importlib + import tempfile + + monkeypatch.setenv("TMPDIR", str(tmp_path)) + importlib.reload(tempfile) + + +@fixture +def monkeypatch_skrub(monkeypatch): + """ + Make `skrub.TableReport.html_snippet()` reproducible + + https://github.com/skrub-data/skrub/blob/35f573ce586fe61ef2c72f4c0c4b188ebf2e664b/skrub/_reporting/_html.py#L153 + """ + monkeypatch.setattr("secrets.token_hex", lambda: "") + + +@fixture +def monkeypatch_matplotlib(monkeypatch): + """ + Make `matplotlib.Figure.savefig(format="svg")` reproducible + + https://matplotlib.org/stable/users/prev_whats_new/whats_new_2.1.0.html#reproducible-ps-pdf-and-svg-output + https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.10.0.html#svg-id-rcparam + """ + import matplotlib + + monkeypatch.setenv("SOURCE_DATE_EPOCH", "0") + + matplotlib_rcparams = matplotlib.rcParams.copy() + matplotlib.rcParams["svg.hashsalt"] = "" + + if "svg.id" in matplotlib.rcParams: + matplotlib.rcParams["svg.id"] = "" + + try: + yield + finally: + matplotlib.rcParams = matplotlib_rcparams + + +@fixture(autouse=True) +def setup( + monkeypatch_tmpdir, + monkeypatch_matplotlib, + monkeypatch_skrub, +): ... diff --git a/ci/tests/end-to-end/test_import.py b/ci/tests/end-to-end/test_import.py new file mode 100644 index 0000000000..945094bae4 --- /dev/null +++ b/ci/tests/end-to-end/test_import.py @@ -0,0 +1,101 @@ +from pytest import fixture +from sklearn.datasets import make_classification, make_regression, fetch_openml +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import train_test_split +from skrub import tabular_learner +from sklearn.model_selection import GridSearchCV + + +@fixture(scope="module") +def regression(): + X, y = make_regression(random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + return { + "estimator": LinearRegression(), + "X_train": X_train, + "y_train": y_train, + "X_test": X_test, + "y_test": y_test, + } + + +@fixture(scope="module") +def classification(): + X, y = make_classification(n_classes=2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + return { + "estimator": LogisticRegression(), + "X_train": X_train, + "y_train": y_train, + "X_test": X_test, + "y_test": y_test, + } + + +@fixture(scope="module") +def gridsearch(): + X, y = fetch_openml("adult", version=2, as_frame=True, return_X_y=True) + y = 1 * (y == ">50K") + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) + + return { + "estimator": GridSearchCV( + estimator=tabular_learner("classification"), + param_grid={ + "histgradientboostingclassifier__learning_rate": [0.01, 0.1, 0.2], + "histgradientboostingclassifier__max_depth": [1, 3, 5], + "histgradientboostingclassifier__max_leaf_nodes": [30, 60, 90], + }, + cv=5, + n_jobs=-1, + refit=True, + scoring="neg_log_loss", + ), + "X_train": X_train, + "y_train": y_train, + "X_test": X_test, + "y_test": y_test, + } + + +def test_put_with_local_project(tmp_path, regression, classification, gridsearch): + import skore + import skore_local_project + + project = skore.Project("", workspace=tmp_path) + + assert isinstance(project._Project__project, skore_local_project.Project) + assert project.mode == "local" + assert project.name == "" + assert project._Project__project.workspace == tmp_path + assert project._Project__project.name == "" + + project.put("regression", skore.EstimatorReport(**regression)) + project.put("classification", skore.EstimatorReport(**classification)) + project.put("gridsearch", skore.EstimatorReport(**gridsearch)) + + +def test_simili_put_with_hub_project(regression, classification, gridsearch): + import skore + import skore_hub_project + + project = skore.Project("hub:///") + + assert isinstance(project._Project__project, skore_hub_project.Project) + assert project.mode == "hub" + assert project.name == "" + assert project._Project__project.tenant == "" + assert project._Project__project.name == "" + + for xp in (regression, classification, gridsearch): + item = skore_hub_project.item.object_to_item(skore.EstimatorReport(**xp)) + + assert item.__metadata__ + assert item.__representation__ + assert item.__parameters__ diff --git a/sphinx/sphinxext/matplotlib_skore_scraper.py b/sphinx/sphinxext/matplotlib_skore_scraper.py index 8884868bf2..d3e674a481 100644 --- a/sphinx/sphinxext/matplotlib_skore_scraper.py +++ b/sphinx/sphinxext/matplotlib_skore_scraper.py @@ -1,7 +1,7 @@ from sphinx_gallery.scrapers import matplotlib_scraper -class matplotlib_skore_scraper: # defining matplotlib scraper as a class not a function +class matplotlib_skore_scraper: # defining matplotlib scraper as a class not a function def __call__(self, *args, **kwargs): kwargs.setdefault("bbox_inches", "tight") return matplotlib_scraper(*args, **kwargs)