Skip to content
Merged
8 changes: 0 additions & 8 deletions skore-hub-project/src/skore_hub_project/artefact/__init__.py

This file was deleted.

81 changes: 0 additions & 81 deletions skore-hub-project/src/skore_hub_project/artefact/artefact.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Class definition of the payload used to upload and send an artifact to ``hub``."""
75 changes: 75 additions & 0 deletions skore-hub-project/src/skore_hub_project/artifact/artifact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Interface definition of the payload used to associate an artifact with a project."""

from abc import ABC, abstractmethod
from contextlib import AbstractContextManager, nullcontext
from functools import cached_property

from pydantic import BaseModel, ConfigDict, Field, computed_field

from skore_hub_project import Project
from skore_hub_project.artifact.upload import upload

Content = str | bytes | None


class Artifact(BaseModel, ABC):
"""
Interface definition of the payload used to associate an artifact with a project.

Attributes
----------
project : Project
The project to which the artifact's payload must be associated.
content_type : str
The content-type of the artifact content.

Notes
-----
It triggers the upload of the content of the artifact, in a lazy way. It is uploaded
as a file to the ``hub`` artifacts storage.
"""

model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)

project: Project = Field(repr=False, exclude=True)
content_type: str

@abstractmethod
def content_to_upload(self) -> Content | AbstractContextManager[Content]:
"""
Content of the artifact to upload.

Example
-------
You can implement this ``abstractmethod`` to return directly the content:

def content_to_upload(self) -> str:
return "<str>"

or to yield the content, as a ``contextmanager`` would:

from contextlib import contextmanager

@contextmanager
def content_to_upload(self) -> Generator[str, None, None]:
yield "<str>"
"""

@computed_field # type: ignore[prop-decorator]
@cached_property
def checksum(self) -> str | None:
"""Checksum used to identify the content of the artifact."""
contextmanager = self.content_to_upload()

if not isinstance(contextmanager, AbstractContextManager):
contextmanager = nullcontext(contextmanager)

with contextmanager as content:
if content is not None:
return upload(
project=self.project,
content=content,
content_type=self.content_type,
)

return None
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Class definitions of the payloads used to send a media to ``hub``."""

from __future__ import annotations

from .data import TableReportTest, TableReportTrain
from .feature_importance import (
Coefficients,
Expand Down
51 changes: 51 additions & 0 deletions skore-hub-project/src/skore_hub_project/artifact/media/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Definition of the payload used to associate a data category media with report."""

from typing import Literal

from pydantic import Field

from skore_hub_project import switch_mpl_backend
from skore_hub_project.artifact.media.media import Media
from skore_hub_project.protocol import EstimatorReport


class TableReport(Media): # noqa: D101
name: Literal["table_report"] = "table_report"
content_type: Literal["application/vnd.skrub.table-report.v1+json"] = (
"application/vnd.skrub.table-report.v1+json"
)

def content_to_upload(self) -> bytes: # noqa: D102
import orjson

with switch_mpl_backend():
display = (
self.report.data.analyze()
if self.data_source is None
else self.report.data.analyze(data_source=self.data_source)
)

table_report = display.summary

# Replace full dataset by its head/tail
dataframe = table_report.pop("dataframe")
table_report["extract_head"] = dataframe.head(3).to_dict(orient="split")
table_report["extract_tail"] = dataframe.tail(3).to_dict(orient="split")

# Remove irrelevant information
del table_report["sample_table"]

return orjson.dumps(
table_report,
option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
)


class TableReportTrain(TableReport): # noqa: D101
report: EstimatorReport = Field(repr=False, exclude=True)
data_source: Literal["train"] = "train"


class TableReportTest(TableReport): # noqa: D101
report: EstimatorReport = Field(repr=False, exclude=True)
data_source: Literal["test"] = "test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Definition of the payload used to associate feature importance media with report."""

from abc import ABC
from collections.abc import Callable
from functools import reduce
from typing import ClassVar, Literal, cast

from pandas import DataFrame
from pydantic import Field

from skore_hub_project.artifact.media.media import Media
from skore_hub_project.protocol import EstimatorReport


class FeatureImportance(Media, ABC): # noqa: D101
accessor: ClassVar[str]
content_type: Literal["application/vnd.dataframe"] = "application/vnd.dataframe"

def content_to_upload(self) -> bytes | None: # noqa: D102
import orjson

try:
function = cast(
Callable,
reduce(getattr, self.accessor.split("."), self.report),
)
except AttributeError:
return None

result = (
function()
if self.data_source is None
else function(data_source=self.data_source)
)

if not isinstance(result, DataFrame):
result = result.frame()

return orjson.dumps(
result.fillna("NaN").to_dict(orient="tight"),
option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
)


class Permutation(FeatureImportance, ABC): # noqa: D101
report: EstimatorReport = Field(repr=False, exclude=True)
accessor: ClassVar[str] = "feature_importance.permutation"
name: Literal["permutation"] = "permutation"


class PermutationTrain(Permutation): # noqa: D101
data_source: Literal["train"] = "train"


class PermutationTest(Permutation): # noqa: D101
data_source: Literal["test"] = "test"


class MeanDecreaseImpurity(FeatureImportance): # noqa: D101
report: EstimatorReport = Field(repr=False, exclude=True)
accessor: ClassVar[str] = "feature_importance.mean_decrease_impurity"
name: Literal["mean_decrease_impurity"] = "mean_decrease_impurity"


class Coefficients(FeatureImportance): # noqa: D101
accessor: ClassVar[str] = "feature_importance.coefficients"
name: Literal["coefficients"] = "coefficients"
29 changes: 29 additions & 0 deletions skore-hub-project/src/skore_hub_project/artifact/media/media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Class definition of the payload used to associate a media with the report."""

from pydantic import Field

from skore_hub_project.artifact.artifact import Artifact
from skore_hub_project.protocol import CrossValidationReport, EstimatorReport


class Media(Artifact):
"""
Payload used to associate a media with the report.

Attributes
----------
project : Project
The project to which the artifact's payload must be associated.
content_type : str
The content-type of the artifact content.
report : EstimatorReport | CrossValidationReport
The report to pickled.
name : str
The name of the media.
data_source : str | None
The source of the data used to generate the media.
"""

report: EstimatorReport | CrossValidationReport = Field(repr=False, exclude=True)
name: str
data_source: str | None = None
15 changes: 15 additions & 0 deletions skore-hub-project/src/skore_hub_project/artifact/media/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Definition of the payload used to associate a model category media with report."""

from typing import Literal

from skore_hub_project.artifact.media.media import Media


class EstimatorHtmlRepr(Media): # noqa: D101
name: Literal["estimator_html_repr"] = "estimator_html_repr"
content_type: Literal["text/html"] = "text/html"

def content_to_upload(self) -> str: # noqa: D102
import sklearn.utils

return sklearn.utils.estimator_html_repr(self.report.estimator)
Loading