Skip to content

Commit f31d58a

Browse files
authored
[ENH] clone studyset (#1149)
* codex implementation * fix style and missing import
1 parent 63ef26e commit f31d58a

File tree

4 files changed

+259
-1
lines changed

4 files changed

+259
-1
lines changed

store/backend/neurostore/resources/data.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import string
2+
from copy import deepcopy
23
from sqlalchemy import func, text
4+
from sqlalchemy.exc import SQLAlchemyError
35

46
from pgvector.sqlalchemy import Vector
57
from flask import request
@@ -87,6 +89,7 @@ class StudysetsView(ObjectView, ListView):
8789
_view_fields = {
8890
**LIST_CLONE_ARGS,
8991
**LIST_NESTED_ARGS,
92+
"copy_annotations": fields.Boolean(load_default=True),
9093
}
9194
# reorg int o2m and m2o
9295
_o2m = {"studies": "StudiesView", "annotations": "AnnotationsView"}
@@ -213,6 +216,157 @@ def serialize_records(self, records, args):
213216
return content
214217
return super().serialize_records(records, args)
215218

219+
def post(self):
220+
args = parser.parse(self._user_args, request, location="query")
221+
copy_annotations = args.pop("copy_annotations", True)
222+
source_id = args.get("source_id")
223+
224+
if not source_id:
225+
return super().post()
226+
227+
source = args.get("source") or "neurostore"
228+
if source != "neurostore":
229+
field_err = make_field_error("source", source, valid_options=["neurostore"])
230+
abort_unprocessable(
231+
"invalid source, choose from: 'neurostore'", [field_err]
232+
)
233+
234+
unknown = self.__class__._schema.opts.unknown
235+
data = parser.parse(
236+
self.__class__._schema(exclude=("id",)), request, unknown=unknown
237+
)
238+
239+
clone_payload, source_record = self._build_clone_payload(source_id, data)
240+
241+
# ensure nested serialization when cloning
242+
args["nested"] = bool(args.get("nested") or request.args.get("source_id"))
243+
244+
with db.session.no_autoflush:
245+
record = self.__class__.update_or_create(clone_payload)
246+
247+
unique_ids = self.get_affected_ids([record.id])
248+
clear_cache(unique_ids)
249+
250+
db.session.flush()
251+
252+
self.update_base_studies(unique_ids.get("base-studies"))
253+
254+
try:
255+
if copy_annotations:
256+
self._clone_annotations(source_record, record)
257+
self.update_annotations(unique_ids.get("annotations"))
258+
except SQLAlchemyError as e:
259+
db.session.rollback()
260+
abort_validation(str(e))
261+
262+
response_context = dict(args)
263+
response = self.__class__._schema(context=response_context).dump(record)
264+
265+
db.session.commit()
266+
267+
return response
268+
269+
def _build_clone_payload(self, source_id, override_data):
270+
source_record = (
271+
Studyset.query.options(
272+
selectinload(Studyset.studies),
273+
selectinload(Studyset.annotations).options(
274+
selectinload(Annotation.annotation_analyses)
275+
),
276+
)
277+
.filter_by(id=source_id)
278+
.first()
279+
)
280+
281+
if source_record is None:
282+
abort_not_found(Studyset.__name__, source_id)
283+
284+
payload = {
285+
"name": source_record.name,
286+
"description": source_record.description,
287+
"publication": source_record.publication,
288+
"doi": source_record.doi,
289+
"pmid": source_record.pmid,
290+
"authors": source_record.authors,
291+
"metadata_": (
292+
deepcopy(source_record.metadata_)
293+
if source_record.metadata_ is not None
294+
else None
295+
),
296+
"public": source_record.public,
297+
"studies": [{"id": study.id} for study in source_record.studies],
298+
"source": "neurostore",
299+
"source_id": self._resolve_neurostore_origin(source_record),
300+
"source_updated_at": source_record.updated_at or source_record.created_at,
301+
}
302+
303+
if payload.get("metadata_") is None:
304+
payload.pop("metadata_", None)
305+
306+
if override_data:
307+
payload.update(override_data)
308+
309+
return payload, source_record
310+
311+
def _clone_annotations(self, source_record, cloned_record):
312+
if not source_record.annotations:
313+
return
314+
315+
owner_id = cloned_record.user_id
316+
317+
for annotation in source_record.annotations:
318+
clone_annotation = Annotation(
319+
name=annotation.name,
320+
description=annotation.description,
321+
source="neurostore",
322+
source_id=self._resolve_neurostore_origin(annotation),
323+
source_updated_at=annotation.updated_at or annotation.created_at,
324+
user_id=owner_id,
325+
metadata_=(
326+
deepcopy(annotation.metadata_) if annotation.metadata_ else None
327+
),
328+
public=annotation.public,
329+
note_keys=(
330+
deepcopy(annotation.note_keys) if annotation.note_keys else {}
331+
),
332+
)
333+
clone_annotation.studyset = cloned_record
334+
db.session.add(clone_annotation)
335+
db.session.flush()
336+
337+
analyses_to_create = []
338+
for aa in annotation.annotation_analyses:
339+
analyses_to_create.append(
340+
AnnotationAnalysis(
341+
annotation_id=clone_annotation.id,
342+
analysis_id=aa.analysis_id,
343+
note=deepcopy(aa.note) if aa.note else {},
344+
user_id=owner_id,
345+
study_id=aa.study_id,
346+
studyset_id=cloned_record.id,
347+
)
348+
)
349+
350+
if analyses_to_create:
351+
db.session.add_all(analyses_to_create)
352+
353+
@staticmethod
354+
def _resolve_neurostore_origin(record):
355+
source_id = record.id
356+
parent_source_id = record.source_id
357+
parent_source = getattr(record, "source", None)
358+
Model = type(record)
359+
360+
while parent_source_id is not None and parent_source == "neurostore":
361+
source_id = parent_source_id
362+
parent = Model.query.filter_by(id=parent_source_id).first()
363+
if parent is None:
364+
break
365+
parent_source_id = parent.source_id
366+
parent_source = getattr(parent, "source", None)
367+
368+
return source_id
369+
216370

217371
@view_maker
218372
class AnnotationsView(ObjectView, ListView):

store/backend/neurostore/schemas/data.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,9 @@ class StudysetSchema(BaseDataSchema):
569569
studies = StringOrNested(
570570
StudySchema, many=True
571571
) # This needs to be nested, but not cloned
572+
source = fields.String(dump_only=True, allow_none=True)
573+
source_id = fields.String(dump_only=True, allow_none=True)
574+
source_updated_at = fields.DateTime(dump_only=True, allow_none=True)
572575

573576
class Meta:
574577
additional = ("name", "description", "publication", "doi", "pmid")

store/backend/neurostore/tests/api/test_studysets.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,104 @@ def test_hot_swap_study_in_studyset(auth_client, ingest_neurosynth, session):
148148
== set(s for s in clone_ss_non_nested.json()["studies"])
149149
== set(s["id"] for s in clone_ss_nested.json()["studies"])
150150
)
151+
152+
153+
def _create_studyset_with_annotation(auth_client, study_ids, name="clone-source"):
154+
studyset_resp = auth_client.post(
155+
"/api/studysets/",
156+
data={
157+
"name": name,
158+
"description": "clone me",
159+
"studies": study_ids,
160+
},
161+
)
162+
assert studyset_resp.status_code == 200
163+
studyset_id = studyset_resp.json()["id"]
164+
165+
annotation_payload = {
166+
"studyset": studyset_id,
167+
"note_keys": {"include": "boolean"},
168+
"name": "annotation for clone",
169+
}
170+
annotation_resp = auth_client.post("/api/annotations/", data=annotation_payload)
171+
assert annotation_resp.status_code == 200
172+
173+
annotations = auth_client.get(f"/api/annotations/?studyset_id={studyset_id}")
174+
assert annotations.status_code == 200
175+
assert len(annotations.json()["results"]) >= 1
176+
177+
return studyset_resp.json(), annotations.json()["results"]
178+
179+
180+
def _study_ids_from_payload(studies):
181+
ids = []
182+
for entry in studies:
183+
if isinstance(entry, dict):
184+
ids.append(entry.get("id"))
185+
else:
186+
ids.append(entry)
187+
return ids
188+
189+
190+
def test_clone_studyset_copies_annotations_by_default(
191+
auth_client, ingest_neurosynth, session
192+
):
193+
studies_payload = auth_client.get("/api/studies/?page_size=2")
194+
study_ids = [study["id"] for study in studies_payload.json()["results"]]
195+
196+
source_studyset, source_annotations = _create_studyset_with_annotation(
197+
auth_client, study_ids
198+
)
199+
200+
clone_resp = auth_client.post(
201+
f"/api/studysets/?source_id={source_studyset['id']}", data={}
202+
)
203+
204+
assert clone_resp.status_code == 200
205+
clone_data = clone_resp.json()
206+
207+
assert clone_data["source"] == "neurostore"
208+
assert clone_data["source_id"] == source_studyset["id"]
209+
assert set(_study_ids_from_payload(clone_data["studies"])) == set(
210+
_study_ids_from_payload(source_studyset["studies"])
211+
)
212+
assert clone_data["user"] == auth_client.username
213+
214+
cloned_annotations = auth_client.get(
215+
f"/api/annotations/?studyset_id={clone_data['id']}"
216+
)
217+
assert cloned_annotations.status_code == 200
218+
assert len(cloned_annotations.json()["results"]) == len(source_annotations)
219+
220+
221+
def test_clone_studyset_without_annotations_when_disabled(
222+
auth_client, ingest_neurosynth, session
223+
):
224+
studies_payload = auth_client.get("/api/studies/?page_size=2")
225+
study_ids = [study["id"] for study in studies_payload.json()["results"]]
226+
227+
source_studyset, source_annotations = _create_studyset_with_annotation(
228+
auth_client, study_ids, name="clone-source-no-annots"
229+
)
230+
231+
assert len(source_annotations) >= 1
232+
233+
clone_resp = auth_client.post(
234+
f"/api/studysets/?source_id={source_studyset['id']}&copy_annotations=false",
235+
data={},
236+
)
237+
238+
assert clone_resp.status_code == 200
239+
clone_data = clone_resp.json()
240+
241+
assert set(_study_ids_from_payload(clone_data["studies"])) == set(
242+
_study_ids_from_payload(source_studyset["studies"])
243+
)
244+
assert clone_data["source"] == "neurostore"
245+
assert clone_data["source_id"] == source_studyset["id"]
246+
247+
cloned_annotations = auth_client.get(
248+
f"/api/annotations/?studyset_id={clone_data['id']}"
249+
)
250+
assert cloned_annotations.status_code == 200
251+
assert cloned_annotations.json()["results"] == []

0 commit comments

Comments
 (0)