probabl-ai · retromechs · Oct 14, 2025 · Oct 7, 2025 · Oct 7, 2025 · Oct 8, 2025
diff --git a/skore-hub-project/src/skore_hub_project/artefact/__init__.py b/skore-hub-project/src/skore_hub_project/artefact/__init__.py
diff --git a/skore-hub-project/src/skore_hub_project/artefact/artefact.py b/skore-hub-project/src/skore_hub_project/artefact/artefact.py
diff --git a/skore-hub-project/src/skore_hub_project/artifact/__init__.py b/skore-hub-project/src/skore_hub_project/artifact/__init__.py
@@ -0,0 +1 @@
+"""Class definition of the payload used to upload and send an artifact to ``hub``."""
diff --git a/skore-hub-project/src/skore_hub_project/artifact/artifact.py b/skore-hub-project/src/skore_hub_project/artifact/artifact.py
@@ -0,0 +1,75 @@
+"""Interface definition of the payload used to associate an artifact with a project."""
+
+from abc import ABC, abstractmethod
+from contextlib import AbstractContextManager, nullcontext
+from functools import cached_property
+
+from pydantic import BaseModel, ConfigDict, Field, computed_field
+
+from skore_hub_project import Project
+from skore_hub_project.artifact.upload import upload
+
+Content = str | bytes | None
+
+
+class Artifact(BaseModel, ABC):
+    """
+    Interface definition of the payload used to associate an artifact with a project.
+
+    Attributes
+    ----------
+    project : Project
+        The project to which the artifact's payload must be associated.
+    content_type : str
+        The content-type of the artifact content.
+
+    Notes
+    -----
+    It triggers the upload of the content of the artifact, in a lazy way. It is uploaded
+    as a file to the ``hub`` artifacts storage.
+    """
+
+    model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
+
+    project: Project = Field(repr=False, exclude=True)
+    content_type: str
+
+    @abstractmethod
+    def content_to_upload(self) -> Content | AbstractContextManager[Content]:
+        """
+        Content of the artifact to upload.
+
+        Example
+        -------
+        You can implement this ``abstractmethod`` to return directly the content:
+
+            def content_to_upload(self) -> str:
+                return "<str>"
+
+        or to yield the content, as a ``contextmanager`` would:
+
+            from contextlib import contextmanager
+
+            @contextmanager
+            def content_to_upload(self) -> Generator[str, None, None]:
+                yield "<str>"
+        """
+
+    @computed_field  # type: ignore[prop-decorator]
+    @cached_property
+    def checksum(self) -> str | None:
+        """Checksum used to identify the content of the artifact."""
+        contextmanager = self.content_to_upload()
+
+        if not isinstance(contextmanager, AbstractContextManager):
+            contextmanager = nullcontext(contextmanager)
+
+        with contextmanager as content:
+            if content is not None:
+                return upload(
+                    project=self.project,
+                    content=content,
+                    content_type=self.content_type,
+                )
+
+        return None
diff --git a/...t/src/skore_hub_project/media/__init__.py → ...re_hub_project/artifact/media/__init__.py b/...t/src/skore_hub_project/media/__init__.py → ...re_hub_project/artifact/media/__init__.py
@@ -1,7 +1,5 @@
 """Class definitions of the payloads used to send a media to ``hub``."""
 
-from __future__ import annotations
-
 from .data import TableReportTest, TableReportTrain
 from .feature_importance import (
     Coefficients,

diff --git a/skore-hub-project/src/skore_hub_project/artifact/media/data.py b/skore-hub-project/src/skore_hub_project/artifact/media/data.py
@@ -0,0 +1,51 @@
+"""Definition of the payload used to associate a data category media with report."""
+
+from typing import Literal
+
+from pydantic import Field
+
+from skore_hub_project import switch_mpl_backend
+from skore_hub_project.artifact.media.media import Media
+from skore_hub_project.protocol import EstimatorReport
+
+
+class TableReport(Media):  # noqa: D101
+    name: Literal["table_report"] = "table_report"
+    content_type: Literal["application/vnd.skrub.table-report.v1+json"] = (
+        "application/vnd.skrub.table-report.v1+json"
+    )
+
+    def content_to_upload(self) -> bytes:  # noqa: D102
+        import orjson
+
+        with switch_mpl_backend():
+            display = (
+                self.report.data.analyze()
+                if self.data_source is None
+                else self.report.data.analyze(data_source=self.data_source)
+            )
+
+        table_report = display.summary
+
+        # Replace full dataset by its head/tail
+        dataframe = table_report.pop("dataframe")
+        table_report["extract_head"] = dataframe.head(3).to_dict(orient="split")
+        table_report["extract_tail"] = dataframe.tail(3).to_dict(orient="split")
+
+        # Remove irrelevant information
+        del table_report["sample_table"]
+
+        return orjson.dumps(
+            table_report,
+            option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
+        )
+
+
+class TableReportTrain(TableReport):  # noqa: D101
+    report: EstimatorReport = Field(repr=False, exclude=True)
+    data_source: Literal["train"] = "train"
+
+
+class TableReportTest(TableReport):  # noqa: D101
+    report: EstimatorReport = Field(repr=False, exclude=True)
+    data_source: Literal["test"] = "test"
diff --git a/skore-hub-project/src/skore_hub_project/artifact/media/feature_importance.py b/skore-hub-project/src/skore_hub_project/artifact/media/feature_importance.py
@@ -0,0 +1,67 @@
+"""Definition of the payload used to associate feature importance media with report."""
+
+from abc import ABC
+from collections.abc import Callable
+from functools import reduce
+from typing import ClassVar, Literal, cast
+
+from pandas import DataFrame
+from pydantic import Field
+
+from skore_hub_project.artifact.media.media import Media
+from skore_hub_project.protocol import EstimatorReport
+
+
+class FeatureImportance(Media, ABC):  # noqa: D101
+    accessor: ClassVar[str]
+    content_type: Literal["application/vnd.dataframe"] = "application/vnd.dataframe"
+
+    def content_to_upload(self) -> bytes | None:  # noqa: D102
+        import orjson
+
+        try:
+            function = cast(
+                Callable,
+                reduce(getattr, self.accessor.split("."), self.report),
+            )
+        except AttributeError:
+            return None
+
+        result = (
+            function()
+            if self.data_source is None
+            else function(data_source=self.data_source)
+        )
+
+        if not isinstance(result, DataFrame):
+            result = result.frame()
+
+        return orjson.dumps(
+            result.fillna("NaN").to_dict(orient="tight"),
+            option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
+        )
+
+
+class Permutation(FeatureImportance, ABC):  # noqa: D101
+    report: EstimatorReport = Field(repr=False, exclude=True)
+    accessor: ClassVar[str] = "feature_importance.permutation"
+    name: Literal["permutation"] = "permutation"
+
+
+class PermutationTrain(Permutation):  # noqa: D101
+    data_source: Literal["train"] = "train"
+
+
+class PermutationTest(Permutation):  # noqa: D101
+    data_source: Literal["test"] = "test"
+
+
+class MeanDecreaseImpurity(FeatureImportance):  # noqa: D101
+    report: EstimatorReport = Field(repr=False, exclude=True)
+    accessor: ClassVar[str] = "feature_importance.mean_decrease_impurity"
+    name: Literal["mean_decrease_impurity"] = "mean_decrease_impurity"
+
+
+class Coefficients(FeatureImportance):  # noqa: D101
+    accessor: ClassVar[str] = "feature_importance.coefficients"
+    name: Literal["coefficients"] = "coefficients"
diff --git a/skore-hub-project/src/skore_hub_project/artifact/media/media.py b/skore-hub-project/src/skore_hub_project/artifact/media/media.py
@@ -0,0 +1,29 @@
+"""Class definition of the payload used to associate a media with the report."""
+
+from pydantic import Field
+
+from skore_hub_project.artifact.artifact import Artifact
+from skore_hub_project.protocol import CrossValidationReport, EstimatorReport
+
+
+class Media(Artifact):
+    """
+    Payload used to associate a media with the report.
+
+    Attributes
+    ----------
+    project : Project
+        The project to which the artifact's payload must be associated.
+    content_type : str
+        The content-type of the artifact content.
+    report : EstimatorReport | CrossValidationReport
+        The report to pickled.
+    name : str
+        The name of the media.
+    data_source : str | None
+        The source of the data used to generate the media.
+    """
+
+    report: EstimatorReport | CrossValidationReport = Field(repr=False, exclude=True)
+    name: str
+    data_source: str | None = None
diff --git a/skore-hub-project/src/skore_hub_project/artifact/media/model.py b/skore-hub-project/src/skore_hub_project/artifact/media/model.py
@@ -0,0 +1,15 @@
+"""Definition of the payload used to associate a model category media with report."""
+
+from typing import Literal
+
+from skore_hub_project.artifact.media.media import Media
+
+
+class EstimatorHtmlRepr(Media):  # noqa: D101
+    name: Literal["estimator_html_repr"] = "estimator_html_repr"
+    content_type: Literal["text/html"] = "text/html"
+
+    def content_to_upload(self) -> str:  # noqa: D102
+        import sklearn.utils
+
+        return sklearn.utils.estimator_html_repr(self.report.estimator)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Class definition of the payload used to upload and send an artifact to ``hub``."""