Skip to content

Commit b9da2ac

Browse files
committed
change to new structure
1 parent 359f6ea commit b9da2ac

19 files changed

Lines changed: 1097850 additions & 296975 deletions

compose_runner/aws_lambda/run_handler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def _log(job_id: str, message: str, **details: Any) -> None:
3636
def _compose_api_base_url(environment: str) -> str:
3737
env = (environment or "production").lower()
3838
if env == "staging":
39-
return "https://synth.neurostore.xyz/api"
39+
return "https://staging.synth.neurostore.xyz/api"
40+
if env == "development":
41+
return "https://dev.synth.neurostore.xyz/api"
4042
if env == "local":
4143
return "http://localhost:81/api"
4244
return "https://compose.neurosynth.org/api"

compose_runner/run.py

Lines changed: 64 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,29 @@ class Runner:
2929

3030
_TARGET_SPACE = "mni152_2mm"
3131

32-
_ENTITY_SNAPSHOT_KEYS = {
33-
"studyset": ("studyset_snapshot", "studyset", "cached_studyset"),
34-
"annotation": ("annotation_snapshot", "annotation", "cached_annotation"),
32+
_ENTITY_SNAPSHOT_ID_KEYS = {
33+
"studyset": ("snapshot_studyset_id",),
34+
"annotation": ("snapshot_annotation_id",),
3535
}
3636
_ENTITY_STORE_PATHS = {
3737
"studyset": "studysets",
3838
"annotation": "annotations",
3939
}
4040
_ENTITY_SNAPSHOT_PATHS = {
41-
"studyset": "studysets",
42-
"annotation": "annotations",
41+
"studyset": "snapshot-studysets",
42+
"annotation": "snapshot-annotations",
4343
}
4444
_ENTITY_NEUROSTORE_KEYS = {
45-
"studyset": ("neurostore_studyset", "neurostore_studyset_id", "studyset"),
45+
"studyset": ("neurostore_studyset", "neurostore_studyset_id"),
4646
"annotation": (
4747
"neurostore_annotation",
4848
"neurostore_annotation_id",
49-
"annotation",
5049
),
5150
}
51+
_ENTITY_SNAPSHOT_SUMMARY_KEYS = {
52+
"studyset": ("neurostore_studyset", "studysets"),
53+
"annotation": ("neurostore_annotation", "annotations"),
54+
}
5255
_ENTITY_COMPOSE_PATHS = {
5356
"studyset": "neurostore-studysets",
5457
"annotation": "neurostore-annotations",
@@ -79,8 +82,8 @@ def __init__(
7982
}
8083
elif environment == "staging":
8184
# staging
82-
self.compose_url = "https://synth.neurostore.xyz/api"
83-
self.store_url = "https://neurostore.xyz/api"
85+
self.compose_url = "https://staging.synth.neurostore.xyz/api"
86+
self.store_url = "https://staging.neurostore.xyz/api"
8487
self.reference_studysets = {
8588
"neurosynth": gen_database_url("staging", "neurosynth"),
8689
"neuroquery": gen_database_url("staging", "neuroquery"),
@@ -206,7 +209,7 @@ def _get_result_documents(self, meta_analysis):
206209
result_id = result_ref
207210
result_doc = None
208211
elif isinstance(result_ref, dict):
209-
result_id = result_ref.get("id")
212+
result_id = result_ref.get("id") or result_ref.get("result_id")
210213
result_doc = result_ref
211214
else:
212215
continue
@@ -215,14 +218,7 @@ def _get_result_documents(self, meta_analysis):
215218
continue
216219
if result_id is not None:
217220
seen_ids.add(result_id)
218-
219-
has_snapshot_payload = any(
220-
isinstance(result_doc.get(key), dict)
221-
for key in self._ENTITY_SNAPSHOT_KEYS["studyset"]
222-
+ self._ENTITY_SNAPSHOT_KEYS["annotation"]
223-
) if isinstance(result_doc, dict) else False
224-
225-
if result_doc is None or (result_id is not None and not has_snapshot_payload):
221+
if result_doc is None:
226222
if result_id is None:
227223
continue
228224
result_doc = self._get_json(
@@ -254,24 +250,33 @@ def _get_entity_snapshot_record(self, entity_name, documents):
254250
for document in documents:
255251
if not isinstance(document, dict):
256252
continue
257-
for key in self._ENTITY_SNAPSHOT_KEYS[entity_name]:
258-
snapshot_document = document.get(key)
259-
payload = self._unwrap_snapshot(snapshot_document)
260-
if is_expected_snapshot(payload):
261-
return payload, self._extract_document_id(snapshot_document)
262-
snapshot_id = self._extract_document_id(snapshot_document)
253+
snapshot_id = None
254+
for key in self._ENTITY_SNAPSHOT_ID_KEYS[entity_name]:
255+
snapshot_id = self._extract_document_id(document.get(key))
263256
if snapshot_id is None:
264257
continue
265-
try:
266-
snapshot_document = self._get_json(
267-
f"{self.compose_url}/{self._ENTITY_SNAPSHOT_PATHS[entity_name]}/{snapshot_id}",
268-
f"Could not download {entity_name} snapshot {snapshot_id}",
269-
)
270-
except requests.exceptions.HTTPError:
271-
continue
272-
payload = self._unwrap_snapshot(snapshot_document)
273-
if is_expected_snapshot(payload):
274-
return payload, self._extract_document_id(snapshot_document) or snapshot_id
258+
break
259+
if snapshot_id is None:
260+
ref_key, summary_key = self._ENTITY_SNAPSHOT_SUMMARY_KEYS[entity_name]
261+
ref_document = document.get(ref_key)
262+
if isinstance(ref_document, dict):
263+
summary_documents = ref_document.get(summary_key) or []
264+
for summary_document in summary_documents:
265+
snapshot_id = self._extract_document_id(summary_document)
266+
if snapshot_id is not None:
267+
break
268+
if snapshot_id is None:
269+
continue
270+
try:
271+
snapshot_document = self._get_json(
272+
f"{self.compose_url}/{self._ENTITY_SNAPSHOT_PATHS[entity_name]}/{snapshot_id}",
273+
f"Could not download {entity_name} snapshot {snapshot_id}",
274+
)
275+
except requests.exceptions.HTTPError:
276+
continue
277+
payload = self._unwrap_snapshot(snapshot_document)
278+
if is_expected_snapshot(payload):
279+
return payload, snapshot_id
275280
return None, None
276281

277282
@staticmethod
@@ -505,17 +510,27 @@ def apply_filter(self, studyset, annotation):
505510
raise ValueError("Cannot have multiple conditions and a database studyset.")
506511

507512
elif len(conditions) == 2 and not database_studyset:
508-
second_analysis_ids = [
509-
n.analysis.id
510-
for n in annotation.notes
511-
if n.note.get(f"{column}") == weight_conditions[-1]
512-
]
513+
if column_type == "boolean":
514+
second_analysis_ids = [
515+
n.analysis.id
516+
for n in annotation.notes
517+
if not n.note.get(f"{column}")
518+
]
519+
else:
520+
second_analysis_ids = [
521+
n.analysis.id
522+
for n in annotation.notes
523+
if n.note.get(f"{column}") == weight_conditions[-1]
524+
]
513525
second_studyset = studyset.slice(analyses=second_analysis_ids)
514526
second_studyset = second_studyset.combine_analyses()
515527

516528
return first_studyset, second_studyset
517529

518530
elif len(conditions) <= 1 and database_studyset:
531+
# collect user study IDs cheaply before loading the large reference database
532+
study_ids = set(studyset.study_ids)
533+
519534
# Download the gzip file
520535
response = requests.get(self.reference_studysets[database_studyset])
521536

@@ -537,26 +552,17 @@ def apply_filter(self, studyset, annotation):
537552
# Load the JSON data into a dictionary
538553
reference_studyset_dict = json.loads(json_data)
539554

540-
reference_studyset = Studyset(reference_studyset_dict, target=self._TARGET_SPACE)
555+
# pre-filter at the dict level to exclude user studies before constructing
556+
# Studyset, keeping the object small and avoiding expensive materialize calls
557+
reference_studyset_dict["studies"] = [
558+
s for s in reference_studyset_dict.get("studies", [])
559+
if s["id"] not in study_ids
560+
]
541561

562+
reference_studyset = Studyset(reference_studyset_dict, target=self._TARGET_SPACE)
542563
del reference_studyset_dict
543-
# get study ids from studyset
544-
study_ids = set([s.id for s in studyset.studies])
545-
546-
# reference study ids
547-
reference_study_ids = set([s.id for s in reference_studyset.studies])
548-
549-
keep_study_ids = reference_study_ids - study_ids
550564

551-
# get analysis ids from reference studyset
552-
analysis_ids = [
553-
a.id
554-
for s in reference_studyset.studies
555-
for a in s.analyses
556-
if s.id in keep_study_ids
557-
]
558-
second_studyset = reference_studyset.slice(analyses=analysis_ids)
559-
second_studyset = second_studyset.combine_analyses()
565+
second_studyset = reference_studyset.combine_analyses()
560566

561567
return first_studyset, second_studyset
562568

@@ -591,9 +597,9 @@ def create_result_object(self):
591597
existing_payload,
592598
existing_id,
593599
):
594-
data[f"cached_{entity_name}"] = existing_id
600+
data[f"snapshot_{entity_name}_id"] = existing_id
595601
else:
596-
data[f"{entity_name}_snapshot"] = live_payload
602+
data[f"snapshot_{entity_name}"] = live_payload
597603

598604
resp = requests.post(
599605
f"{self.compose_url}/meta-analysis-results",

0 commit comments

Comments
 (0)