Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions .github/workflows/end-to-end.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: end-to-end

on:
pull_request:
push:
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions: {}

defaults:
run:
shell: bash

env:
PACKAGES: '["skore","skore-hub-project","skore-local-project"]'

jobs:
end-to-end-changes:
runs-on: ubuntu-latest
outputs:
modified-packages: ${{ steps.filter.outputs.changes }}
permissions:
pull-requests: read
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Create dynamically filtering file, based on `env.PACKAGES`
id: setup
run: |
echo "${PACKAGES}" | jq -r '.[]' | while read package; do
>>${FILEPATH} echo "${package}:
- '.github/workflows/end-to-end.yml'
- 'ci/requirements/${package}/**'
- '${package}/**'"
done
env:
FILEPATH: ${{ runner.temp }}/filters.yaml

- name: Define if at least one file has changed
id: filter
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
with:
filters: ${{ runner.temp }}/filters.yaml


end-to-end:
runs-on: ubuntu-latest
needs: [end-to-end-changes]
if: ${{ (github.event_name == 'push') || (needs.pytest-changes.outputs.modified-packages != '[]') }}
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
id: setup-python
with:
python-version: 3.13
check-latest: True
cache: pip

- name: Restore python-venv
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
id: cache-python-venv
with:
path: venv/
key: >-
python-venv
-ubuntu-latest
-3.13
-${{ hashFiles('ci/requirements/skore/python-3.13/scikit-learn-1.7/test-requirements.txt') }}
-${{ hashFiles('ci/requirements/skore-hub-project/python-3.13/scikit-learn-1.7/test-requirements.txt') }}
-${{ hashFiles('ci/requirements/skore-local-project/python-3.13/scikit-learn-1.7/test-requirements.txt') }}
Comment on lines +79 to +81
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Timebox: try to factorize.


- name: Setup python-venv
run: |
set -eu

# Ensure venv is created
python -m venv venv

# Activate venv for each step depending on the OS
echo "${GITHUB_WORKSPACE}/venv/bin" >> ${GITHUB_PATH}
echo "VIRTUAL_ENV=${GITHUB_WORKSPACE}/venv" >> ${GITHUB_ENV}

- name: Install dependencies in python-venv
if: steps.cache-python-venv.outputs.cache-hit != 'true'
run: |
python -m pip install --upgrade pip build

echo "${PACKAGES}" | jq -r '.[]' | while read package; do
python -m pip install --requirement "ci/requirements/${package}/python-3.13/scikit-learn-1.7/test-requirements.txt"
done

- name: Save python-venv
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
if: steps.cache-python-venv.outputs.cache-hit != 'true'
with:
path: venv/
key: ${{ steps.cache-python-venv.outputs.cache-primary-key }}

- name: Build and install
run: |
echo "${PACKAGES}" | jq -r '.[]' | while read package; do
(
cd "${package}/"

# build
python -m build

# install
wheel=(dist/*.whl); python -m pip install --force-reinstall --no-deps "${wheel}"
)
done

- name: Test
timeout-minutes: 10
working-directory: ci/tests/end-to-end
run: python -m pytest --import-mode=importlib --no-header --verbosity=2 --dist=loadscope --numprocesses auto --no-cov
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,5 @@ jobs:
pre-commit run --all-files trailing-whitespace
pre-commit run --all-files typos
pre-commit run --all-files nbstripout
pre-commit run --all-files ruff
pre-commit run --all-files ruff-check
pre-commit run --all-files mypy
10 changes: 4 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/crate-ci/typos
rev: v1.30.2
rev: v1.33.1
hooks:
- id: typos

Expand All @@ -22,16 +22,14 @@ repos:
args: [--keep-id]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.10
rev: v0.11.13
hooks:
- id: ruff
files: ^((skore|skore-hub-project|skore-local-project)/(hatch|src|tests))|(examples)/
- id: ruff-check
args: [--fix]
- id: ruff-format
files: ^((skore|skore-hub-project|skore-local-project)/(hatch|src|tests))|(examples)/

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
rev: v1.16.1
hooks:
- id: mypy
files: ^(skore|skore-hub-project|skore-local-project)/src/
71 changes: 71 additions & 0 deletions ci/tests/end-to-end/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from pytest import fixture


def pytest_configure(config):
import matplotlib

# Use a non-interactive ``matplotlib.backend`` that can only write to files.
#
# https://github.com/matplotlib/matplotlib/issues/29119
# https://matplotlib.org/stable/users/explain/figure/backends.html#selecting-a-backend
matplotlib.use("agg")


@fixture
def monkeypatch_tmpdir(monkeypatch, tmp_path):
"""
Change ``TMPDIR`` used by ``tempfile.gettempdir()`` to point to ``tmp_path``, so
that it is automatically deleted after use, with no impact on user's environment.

Force the reload of the ``tempfile`` module to change the cached return of
``tempfile.gettempdir()``.

https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir
"""
import importlib
import tempfile

monkeypatch.setenv("TMPDIR", str(tmp_path))
importlib.reload(tempfile)


@fixture
def monkeypatch_skrub(monkeypatch):
"""
Make `skrub.TableReport.html_snippet()` reproducible

https://github.com/skrub-data/skrub/blob/35f573ce586fe61ef2c72f4c0c4b188ebf2e664b/skrub/_reporting/_html.py#L153
"""
monkeypatch.setattr("secrets.token_hex", lambda: "<token>")


@fixture
def monkeypatch_matplotlib(monkeypatch):
"""
Make `matplotlib.Figure.savefig(format="svg")` reproducible

https://matplotlib.org/stable/users/prev_whats_new/whats_new_2.1.0.html#reproducible-ps-pdf-and-svg-output
https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.10.0.html#svg-id-rcparam
"""
import matplotlib

monkeypatch.setenv("SOURCE_DATE_EPOCH", "0")

matplotlib_rcparams = matplotlib.rcParams.copy()
matplotlib.rcParams["svg.hashsalt"] = "<hashsalt>"

if "svg.id" in matplotlib.rcParams:
matplotlib.rcParams["svg.id"] = "<id>"

try:
yield
finally:
matplotlib.rcParams = matplotlib_rcparams


@fixture(autouse=True)
def setup(
monkeypatch_tmpdir,
monkeypatch_matplotlib,
monkeypatch_skrub,
): ...
101 changes: 101 additions & 0 deletions ci/tests/end-to-end/test_import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from pytest import fixture
from sklearn.datasets import make_classification, make_regression, fetch_openml
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from skrub import tabular_learner
from sklearn.model_selection import GridSearchCV


@fixture(scope="module")
def regression():
X, y = make_regression(random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

return {
"estimator": LinearRegression(),
"X_train": X_train,
"y_train": y_train,
"X_test": X_test,
"y_test": y_test,
}


@fixture(scope="module")
def classification():
X, y = make_classification(n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

return {
"estimator": LogisticRegression(),
"X_train": X_train,
"y_train": y_train,
"X_test": X_test,
"y_test": y_test,
}


@fixture(scope="module")
def gridsearch():
X, y = fetch_openml("adult", version=2, as_frame=True, return_X_y=True)
y = 1 * (y == ">50K")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

return {
"estimator": GridSearchCV(
estimator=tabular_learner("classification"),
param_grid={
"histgradientboostingclassifier__learning_rate": [0.01, 0.1, 0.2],
"histgradientboostingclassifier__max_depth": [1, 3, 5],
"histgradientboostingclassifier__max_leaf_nodes": [30, 60, 90],
},
cv=5,
n_jobs=-1,
refit=True,
scoring="neg_log_loss",
),
"X_train": X_train,
"y_train": y_train,
"X_test": X_test,
"y_test": y_test,
}


def test_put_with_local_project(tmp_path, regression, classification, gridsearch):
import skore
import skore_local_project

project = skore.Project("<name>", workspace=tmp_path)

assert isinstance(project._Project__project, skore_local_project.Project)
assert project.mode == "local"
assert project.name == "<name>"
assert project._Project__project.workspace == tmp_path
assert project._Project__project.name == "<name>"

project.put("regression", skore.EstimatorReport(**regression))
project.put("classification", skore.EstimatorReport(**classification))
project.put("gridsearch", skore.EstimatorReport(**gridsearch))


def test_simili_put_with_hub_project(regression, classification, gridsearch):
import skore
import skore_hub_project

project = skore.Project("hub://<tenant>/<name>")

assert isinstance(project._Project__project, skore_hub_project.Project)
assert project.mode == "hub"
assert project.name == "<name>"
assert project._Project__project.tenant == "<tenant>"
assert project._Project__project.name == "<name>"

for xp in (regression, classification, gridsearch):
item = skore_hub_project.item.object_to_item(skore.EstimatorReport(**xp))

assert item.__metadata__
assert item.__representation__
assert item.__parameters__
2 changes: 1 addition & 1 deletion sphinx/sphinxext/matplotlib_skore_scraper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sphinx_gallery.scrapers import matplotlib_scraper


class matplotlib_skore_scraper: # defining matplotlib scraper as a class not a function
class matplotlib_skore_scraper: # defining matplotlib scraper as a class not a function
def __call__(self, *args, **kwargs):
kwargs.setdefault("bbox_inches", "tight")
return matplotlib_scraper(*args, **kwargs)