Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions skore-hub-project/src/skore_hub_project/artefact/__init__.py

This file was deleted.

8 changes: 8 additions & 0 deletions skore-hub-project/src/skore_hub_project/artifact/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Class definition of the payload used to upload and send an artifact to ``hub``."""

from .artifact import CrossValidationReportArtifact, EstimatorReportArtifact

__all__ = [
"EstimatorReportArtifact",
"CrossValidationReportArtifact",
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Class definition of the payload used to upload and send an artefact to ``hub``."""
"""Class definition of the payload used to upload and send an artifact to ``hub``."""

from __future__ import annotations

Expand All @@ -9,26 +9,26 @@
from pydantic import BaseModel, ConfigDict, Field, computed_field

from skore_hub_project import Project
from skore_hub_project.artefact.upload import upload
from skore_hub_project.artifact.upload import upload
from skore_hub_project.protocol import CrossValidationReport, EstimatorReport


class Artefact(ABC, BaseModel):
"""Payload used to send the artefact of a report to ``hub``."""
class Artifact(ABC, BaseModel):
"""Payload used to send the artifact of a report to ``hub``."""

model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)

project: Project = Field(repr=False, exclude=True)
object: Any = Field(repr=False, exclude=True)


class EstimatorReportArtefact(Artefact):
class EstimatorReportArtifact(Artifact):
"""
Payload used to upload and send an estimator report artefact to ``hub``.
Payload used to upload and send an estimator report artifact to ``hub``.

Notes
-----
It uploads the report to artefacts storage in a lazy way.
It uploads the report to artifacts storage in a lazy way.

The report is uploaded without its cache, to avoid salting the checksum.
The report is primarily pickled on disk to reduce RAM footprint.
Expand All @@ -39,7 +39,7 @@ class EstimatorReportArtefact(Artefact):
@computed_field # type: ignore[prop-decorator]
@cached_property
def checksum(self) -> str:
"""Checksum, useful for retrieving the artefact from artefact storage."""
"""Checksum, useful for retrieving the artifact from artifact storage."""
cache = self.object._cache
self.object._cache = {}

Expand All @@ -49,13 +49,13 @@ def checksum(self) -> str:
self.object._cache = cache


class CrossValidationReportArtefact(Artefact):
class CrossValidationReportArtifact(Artifact):
"""
Payload used to upload and send a cross-validation report artefact to ``hub``.
Payload used to upload and send a cross-validation report artifact to ``hub``.

Notes
-----
It uploads the report to artefacts storage in a lazy way.
It uploads the report to artifacts storage in a lazy way.

The report is uploaded without its cache, to avoid salting the checksum.
The report is primarily pickled on disk to reduce RAM footprint.
Expand All @@ -66,7 +66,7 @@ class CrossValidationReportArtefact(Artefact):
@computed_field # type: ignore[prop-decorator]
@cached_property
def checksum(self) -> str:
"""Checksum, useful for retrieving the artefact from artefact storage."""
"""Checksum, useful for retrieving the artifact from artifact storage."""
reports = [self.object] + self.object.estimator_reports_
caches = []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Function definition of the artefact ``upload``."""
"""Function definition of the artifact ``upload``."""

from __future__ import annotations

Expand Down Expand Up @@ -43,16 +43,16 @@ def upload_chunk(
length: int,
) -> str:
"""
Upload a chunk of the serialized object to the artefacts storage.
Upload a chunk of the serialized object to the artifacts storage.

Parameters
----------
filepath : ``Path``
The path of the file containing the serialized object.
client : ``httpx.Client``
The client used to upload the chunk to the artefacts storage.
The client used to upload the chunk to the artifacts storage.
url : str
The url used to upload the chunk to the artefacts storage.
The url used to upload the chunk to the artifacts storage.
offset : int
The start of the chunk in the file containing the serialized object.
length: int
Expand All @@ -61,7 +61,7 @@ def upload_chunk(
Returns
-------
etag : str
The ETag assigned by the artefacts storage to the chunk, used to acknowledge the
The ETag assigned by the artifacts storage to the chunk, used to acknowledge the
upload.

Notes
Expand All @@ -88,7 +88,7 @@ def upload_chunk(

def upload(project: Project, o: Any, type: str) -> str:
"""
Upload an object to the artefacts storage.
Upload an object to the artifacts storage.

Parameters
----------
Expand All @@ -97,12 +97,12 @@ def upload(project: Project, o: Any, type: str) -> str:
o : Any
The object to upload.
type : str
The type to associate to object in the artefacts storage.
The type to associate to object in the artifacts storage.

Returns
-------
checksum : str
The checksum of the object after upload to the artefacts storage, based on its
The checksum of the object after upload to the artifacts storage, based on its
``joblib`` serialization.

Notes
Expand All @@ -117,7 +117,7 @@ def upload(project: Project, o: Any, type: str) -> str:
):
# Ask for upload urls.
response = hub_client.post(
url=f"projects/{project.tenant}/{project.name}/artefacts",
url=f"projects/{project.tenant}/{project.name}/artifacts",
json=[
{
"checksum": serializer.checksum,
Expand All @@ -127,12 +127,12 @@ def upload(project: Project, o: Any, type: str) -> str:
],
)

# An empty response means that an artefact with the same checksum already
# An empty response means that an artifact with the same checksum already
# exists. The object doesn't have to be re-uploaded.
if urls := response.json():
task_to_chunk_id = {}

# Upload each chunk of the serialized object to the artefacts storage, using
# Upload each chunk of the serialized object to the artifacts storage, using
# a disk temporary file.
#
# Each task is in charge of reading its own file chunk at runtime, to reduce
Expand Down Expand Up @@ -175,7 +175,7 @@ def upload(project: Project, o: Any, type: str) -> str:

# Acknowledge the upload, to let the hub/storage rebuild the whole.
hub_client.post(
url=f"projects/{project.tenant}/{project.name}/artefacts/complete",
url=f"projects/{project.tenant}/{project.name}/artifacts/complete",
json=[
{
"checksum": serializer.checksum,
Expand Down
4 changes: 2 additions & 2 deletions skore-hub-project/src/skore_hub_project/project/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ def get(id: str) -> EstimatorReport:
# Ask for read url.
with HUBClient() as client:
response = client.get(
url=f"projects/{self.tenant}/{self.name}/artefacts/read",
params={"artefact_checksum": [checksum]},
url=f"projects/{self.tenant}/{self.name}/artifacts/read",
params={"artifact_checksum": [checksum]},
)

url = response.json()[0]["url"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.model_selection import BaseCrossValidator
from sklearn.model_selection._split import _CVIterableWrapper

from skore_hub_project.artefact import CrossValidationReportArtefact
from skore_hub_project.artifact import CrossValidationReportArtifact
from skore_hub_project.media import (
EstimatorHtmlRepr,
)
Expand Down Expand Up @@ -75,7 +75,7 @@ class CrossValidationReportPayload(ReportPayload):
report : CrossValidationReport
The report on which to calculate the payload to be sent.
upload : bool, optional
Upload the report to the artefacts storage, default True.
Upload the report to the artifacts storage, default True.
key : str
The key to associate to the report.
"""
Expand Down Expand Up @@ -231,20 +231,20 @@ def estimators(self) -> list[EstimatorReportPayload]:

@computed_field # type: ignore[prop-decorator]
@cached_property
def parameters(self) -> CrossValidationReportArtefact | dict[()]:
def parameters(self) -> CrossValidationReportArtifact | dict[()]:
"""
The checksum of the instance.

The checksum of the instance that was assigned after being uploaded to the
artefact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artefacts storage.
artifact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artifacts storage.

.. deprecated
The ``parameters`` property will be removed in favor of a new ``checksum``
property in a near future.
"""
if self.upload:
return CrossValidationReportArtefact(
return CrossValidationReportArtifact(
project=self.project,
report=self.report,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pydantic import Field, computed_field

from skore_hub_project.artefact import EstimatorReportArtefact
from skore_hub_project.artifact import EstimatorReportArtifact
from skore_hub_project.media import (
Coefficients,
EstimatorHtmlRepr,
Expand Down Expand Up @@ -63,7 +63,7 @@ class EstimatorReportPayload(ReportPayload):
report : EstimatorReport
The report on which to calculate the payload to be sent.
upload : bool, optional
Upload the report to the artefacts storage, default True.
Upload the report to the artifacts storage, default True.
key : str
The key to associate to the report.
"""
Expand Down Expand Up @@ -116,18 +116,18 @@ class EstimatorReportPayload(ReportPayload):

@computed_field # type: ignore[prop-decorator]
@cached_property
def parameters(self) -> EstimatorReportArtefact | dict:
def parameters(self) -> EstimatorReportArtifact | dict:
"""
The checksum of the instance.

The checksum of the instance that was assigned after being uploaded to the
artefact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artefacts storage.
artifact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artifacts storage.

.. deprecated
The ``parameters`` property will be removed in favor of a new ``checksum``
property in a near future.
"""
if self.upload:
return EstimatorReportArtefact(project=self.project, report=self.report)
return EstimatorReportArtifact(project=self.project, report=self.report)
return {}
10 changes: 5 additions & 5 deletions skore-hub-project/src/skore_hub_project/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pydantic import BaseModel, ConfigDict, Field, computed_field

from skore_hub_project import Project
from skore_hub_project.artefact.artefact import Artefact
from skore_hub_project.artifact.artifact import Artifact
from skore_hub_project.media.media import Media
from skore_hub_project.metric.metric import Metric
from skore_hub_project.protocol import CrossValidationReport, EstimatorReport
Expand All @@ -28,7 +28,7 @@ class ReportPayload(ABC, BaseModel):
report : EstimatorReport | CrossValidationReport
The report on which to calculate the payload to be sent.
upload : bool, optional
Upload the report to the artefacts storage, default True.
Upload the report to the artifacts storage, default True.
key : str
The key to associate to the report.
"""
Expand Down Expand Up @@ -74,13 +74,13 @@ def ml_task(self) -> str:
@computed_field # type: ignore[prop-decorator]
@property
@abstractmethod
def parameters(self) -> Artefact | dict[()]:
def parameters(self) -> Artifact | dict[()]:
"""
The checksum of the instance.

The checksum of the instance that was assigned after being uploaded to the
artefact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artefacts storage.
artifact storage. It is based on its ``joblib`` serialization and mainly used to
retrieve it from the artifacts storage.

.. deprecated
The ``parameters`` property will be removed in favor of a new ``checksum``
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from blake3 import blake3 as Blake3
from skore_hub_project import bytes_to_b64_str
from skore_hub_project.artefact.serializer import Serializer
from skore_hub_project.artifact.serializer import Serializer


class TestSerializer:
Expand Down
8 changes: 4 additions & 4 deletions skore-hub-project/tests/unit/project/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def monkeypatch_client(monkeypatch):
FakeClient,
)
monkeypatch.setattr(
"skore_hub_project.artefact.upload.HUBClient",
"skore_hub_project.artifact.upload.HUBClient",
FakeClient,
)

Expand Down Expand Up @@ -110,7 +110,7 @@ def test_put_estimator_report(self, monkeypatch, binary_classification, respx_mo
respx_mock.post("projects/<tenant>/<name>/runs").mock(
Response(200, json={"id": 0})
)
respx_mock.post("projects/<tenant>/<name>/artefacts").mock(
respx_mock.post("projects/<tenant>/<name>/artifacts").mock(
Response(200, json=[])
)
respx_mock.post("projects/<tenant>/<name>/estimator-reports").mock(
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_put_cross_validation_report(
respx_mock.post("projects/<tenant>/<name>/runs").mock(
Response(200, json={"id": 0})
)
respx_mock.post("projects/<tenant>/<name>/artefacts").mock(
respx_mock.post("projects/<tenant>/<name>/artifacts").mock(
Response(200, json=[])
)
respx_mock.post("projects/<tenant>/<name>/cross-validation-reports").mock(
Expand Down Expand Up @@ -182,7 +182,7 @@ def test_reports_get(self, respx_mock, regression):
response = Response(200, json={"raw": {"checksum": "<checksum>"}})
respx_mock.get(url).mock(response)

url = "projects/<tenant>/<name>/artefacts/read"
url = "projects/<tenant>/<name>/artifacts/read"
response = Response(200, json=[{"url": "http://url.com"}])
respx_mock.get(url).mock(response)

Expand Down
Loading
Loading