Skip to content

Commit e97d6bf

Browse files
committed
fix how datetime and uuid objects are serialized
1 parent 3dea2d9 commit e97d6bf

2 files changed

Lines changed: 110 additions & 7 deletions

File tree

compose_runner/run.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import json
55
import io
66
import pickle
7+
from datetime import date, datetime
78
from importlib import import_module
89
from pathlib import Path
10+
from uuid import UUID
911

1012
import requests
1113
import neurosynth_compose_sdk
@@ -227,7 +229,7 @@ def _get_entity_snapshot_record(self, entity_name, documents):
227229
# Old API format: list of {id, md5} snapshot summaries
228230
ref_document = document.get(ref_key)
229231
if isinstance(ref_document, dict):
230-
for summary_document in (ref_document.get(summary_key) or []):
232+
for summary_document in ref_document.get(summary_key) or []:
231233
snapshot_id = self._extract_document_id(summary_document)
232234
if snapshot_id is not None:
233235
break
@@ -239,9 +241,11 @@ def _get_entity_snapshot_record(self, entity_name, documents):
239241
id=snapshot_id
240242
).to_dict()
241243
else:
242-
snapshot_document = self.compose_api.snapshot_annotations_id_get(
243-
id=snapshot_id
244-
).to_dict()
244+
snapshot_document = (
245+
self.compose_api.snapshot_annotations_id_get(
246+
id=snapshot_id
247+
).to_dict()
248+
)
245249
except ComposeApiException:
246250
continue
247251
payload = self._unwrap_snapshot(snapshot_document)
@@ -357,12 +361,36 @@ def _apply_entity_records(self, records):
357361
self.existing_annotation_snapshot_id = records["annotation"]["snapshot_id"]
358362

359363
@staticmethod
360-
def _snapshot_md5(payload):
361-
serialized_payload = json.dumps(
364+
def _json_payload_default(value):
365+
if isinstance(value, (date, datetime)):
366+
return value.isoformat()
367+
if isinstance(value, UUID):
368+
return str(value)
369+
if isinstance(value, set):
370+
return sorted(value, key=str)
371+
to_dict = getattr(value, "to_dict", None)
372+
if callable(to_dict):
373+
return to_dict()
374+
raise TypeError(
375+
f"Object of type {value.__class__.__name__} is not JSON serializable"
376+
)
377+
378+
@classmethod
379+
def _snapshot_json(cls, payload):
380+
return json.dumps(
362381
payload,
382+
default=cls._json_payload_default,
363383
sort_keys=True,
364384
separators=(",", ":"),
365385
)
386+
387+
@classmethod
388+
def _json_safe_payload(cls, payload):
389+
return json.loads(cls._snapshot_json(payload))
390+
391+
@classmethod
392+
def _snapshot_md5(cls, payload):
393+
serialized_payload = cls._snapshot_json(payload)
366394
return hashlib.md5(serialized_payload.encode("utf-8")).hexdigest()
367395

368396
def _should_link_existing_snapshot(
@@ -587,7 +615,9 @@ def create_result_object(self):
587615
):
588616
kwargs[f"snapshot_{entity_name}_id"] = existing_id
589617
else:
590-
kwargs[f"snapshot_{entity_name}"] = live_payload
618+
kwargs[f"snapshot_{entity_name}"] = self._json_safe_payload(
619+
live_payload
620+
)
591621

592622
self._compose_config.api_key["upload_key"] = self.nsc_key
593623
result = self.compose_api.meta_analysis_results_post(

compose_runner/tests/test_run.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from datetime import date, datetime, timezone
2+
from uuid import UUID
3+
14
import pytest
25
from neurosynth_compose_sdk.exceptions import ApiException as ComposeApiException
36

@@ -27,6 +30,76 @@ def test_download_bundle():
2730
assert runner.cached_specification is not None
2831

2932

33+
def test_snapshot_md5_serializes_sdk_scalars_like_api_strings():
34+
created_at = datetime(2023, 6, 19, 15, 29, 59, 132810, tzinfo=timezone.utc)
35+
live_payload = {
36+
"created_at": created_at,
37+
"id": UUID("00000000-0000-0000-0000-000000000001"),
38+
"studies": [{"created_at": created_at, "publication_date": date(2023, 6, 19)}],
39+
}
40+
api_payload = {
41+
"created_at": "2023-06-19T15:29:59.132810+00:00",
42+
"id": "00000000-0000-0000-0000-000000000001",
43+
"studies": [
44+
{
45+
"created_at": "2023-06-19T15:29:59.132810+00:00",
46+
"publication_date": "2023-06-19",
47+
}
48+
],
49+
}
50+
51+
assert Runner._snapshot_md5(live_payload) == Runner._snapshot_md5(api_payload)
52+
53+
54+
def test_json_safe_payload_normalizes_datetimes_without_mutating_source():
55+
created_at = datetime(2023, 6, 19, 15, 29, 59, 132810, tzinfo=timezone.utc)
56+
payload = {"created_at": created_at, "tags": {"b", "a"}}
57+
58+
normalized = Runner._json_safe_payload(payload)
59+
60+
assert normalized == {
61+
"created_at": "2023-06-19T15:29:59.132810+00:00",
62+
"tags": ["a", "b"],
63+
}
64+
assert payload["created_at"] is created_at
65+
66+
67+
def test_create_result_object_normalizes_uploaded_snapshots():
68+
created_at = datetime(2023, 6, 19, 15, 29, 59, 132810, tzinfo=timezone.utc)
69+
captured = {}
70+
71+
class FakeComposeApi:
72+
def meta_analysis_results_post(self, result_init):
73+
captured["result_init"] = result_init
74+
return type("Result", (), {"id": "result-id"})()
75+
76+
runner = Runner(meta_analysis_id="meta-id", environment="production")
77+
runner.compose_api = FakeComposeApi()
78+
runner.cached_studyset = {
79+
"id": UUID("00000000-0000-0000-0000-000000000001"),
80+
"created_at": created_at,
81+
"studies": [],
82+
}
83+
runner.cached_annotation = {
84+
"created_at": created_at,
85+
"notes": [],
86+
}
87+
88+
runner.create_result_object()
89+
90+
result_init = captured["result_init"]
91+
assert result_init.snapshot_studyset == {
92+
"created_at": "2023-06-19T15:29:59.132810+00:00",
93+
"id": "00000000-0000-0000-0000-000000000001",
94+
"studies": [],
95+
}
96+
assert result_init.snapshot_annotation == {
97+
"created_at": "2023-06-19T15:29:59.132810+00:00",
98+
"notes": [],
99+
}
100+
assert runner.result_id == "result-id"
101+
102+
30103
@pytest.mark.vcr
31104
def test_run_workflow():
32105
runner = Runner(

0 commit comments

Comments
 (0)