diff --git a/skore-local-project/src/skore_local_project/project.py b/skore-local-project/src/skore_local_project/project.py index cd2b707270..ef4657a116 100644 --- a/skore-local-project/src/skore_local_project/project.py +++ b/skore-local-project/src/skore_local_project/project.py @@ -170,7 +170,9 @@ def pickle(report: EstimatorReport | CrossValidationReport) -> tuple[str, bytes] The report is pickled without its cache, to avoid salting the hash. """ reports = [report] + getattr(report, "estimator_reports_", []) - caches = [report_to_clear.__dict__.pop("_cache") for report_to_clear in reports] + caches = [report_to_clear._cache for report_to_clear in reports] + + report.clear_cache() try: with io.BytesIO() as stream: diff --git a/skore-local-project/tests/unit/test_project.py b/skore-local-project/tests/unit/test_project.py index 918422172f..2e9d6f1b6f 100644 --- a/skore-local-project/tests/unit/test_project.py +++ b/skore-local-project/tests/unit/test_project.py @@ -126,36 +126,56 @@ def test_init(self, monkeypatch, tmp_path): def test_pickle_estimator_report(self, regression): # Pickle the report once, without any value in the cache assert not regression._cache - pickle_1 = Project.pickle(regression) + hash1, pickle1 = Project.pickle(regression) assert not regression._cache # Pickle the same report, but with values in the cache regression.cache_predictions() assert regression._cache - pickle_2 = Project.pickle(regression) + hash2, pickle2 = Project.pickle(regression) assert regression._cache # Make sure that the two pickles on the report are not affected by the cache - assert pickle_1 == pickle_2 + assert (hash1, pickle1) == (hash2, pickle2) + + # Make sure that pickles are not broken + with BytesIO(pickle1) as stream: + report1 = joblib.load(stream) + + with BytesIO(pickle2) as stream: + report2 = joblib.load(stream) + + report1.cache_predictions() + report2.cache_predictions() def test_pickle_cross_validation_report(self, cv_regression): reports = [cv_regression] + cv_regression.estimator_reports_ # Pickle the report once, without any value in the cache assert not any(report._cache for report in reports) - pickle_1 = Project.pickle(cv_regression) + hash1, pickle1 = Project.pickle(cv_regression) assert not any(report._cache for report in reports) # Pickle the same report, but with values in the cache cv_regression.cache_predictions() assert any(report._cache for report in reports) - pickle_2 = Project.pickle(cv_regression) + hash2, pickle2 = Project.pickle(cv_regression) assert any(report._cache for report in reports) # Make sure that the two pickles on the report are not affected by the cache - assert pickle_1 == pickle_2 + assert (hash1, pickle1) == (hash2, pickle2) + + # Make sure that pickles are not broken + with BytesIO(pickle1) as stream: + report1 = joblib.load(stream) + + with BytesIO(pickle2) as stream: + report2 = joblib.load(stream) + + report1.cache_predictions() + report2.cache_predictions() def test_init_with_envar(self, monkeypatch, tmp_path): monkeypatch.setenv("SKORE_WORKSPACE", str(tmp_path))