@@ -29,26 +29,29 @@ class Runner:
2929
3030 _TARGET_SPACE = "mni152_2mm"
3131
32- _ENTITY_SNAPSHOT_KEYS = {
33- "studyset" : ("studyset_snapshot" , "studyset" , "cached_studyset" ),
34- "annotation" : ("annotation_snapshot" , "annotation" , "cached_annotation" ),
32+ _ENTITY_SNAPSHOT_ID_KEYS = {
33+ "studyset" : ("snapshot_studyset_id" , ),
34+ "annotation" : ("snapshot_annotation_id" , ),
3535 }
3636 _ENTITY_STORE_PATHS = {
3737 "studyset" : "studysets" ,
3838 "annotation" : "annotations" ,
3939 }
4040 _ENTITY_SNAPSHOT_PATHS = {
41- "studyset" : "studysets" ,
42- "annotation" : "annotations" ,
41+ "studyset" : "snapshot- studysets" ,
42+ "annotation" : "snapshot- annotations" ,
4343 }
4444 _ENTITY_NEUROSTORE_KEYS = {
45- "studyset" : ("neurostore_studyset" , "neurostore_studyset_id" , "studyset" ),
45+ "studyset" : ("neurostore_studyset" , "neurostore_studyset_id" ),
4646 "annotation" : (
4747 "neurostore_annotation" ,
4848 "neurostore_annotation_id" ,
49- "annotation" ,
5049 ),
5150 }
51+ _ENTITY_SNAPSHOT_SUMMARY_KEYS = {
52+ "studyset" : ("neurostore_studyset" , "studysets" ),
53+ "annotation" : ("neurostore_annotation" , "annotations" ),
54+ }
5255 _ENTITY_COMPOSE_PATHS = {
5356 "studyset" : "neurostore-studysets" ,
5457 "annotation" : "neurostore-annotations" ,
@@ -79,8 +82,8 @@ def __init__(
7982 }
8083 elif environment == "staging" :
8184 # staging
82- self .compose_url = "https://synth.neurostore.xyz/api"
83- self .store_url = "https://neurostore.xyz/api"
85+ self .compose_url = "https://staging. synth.neurostore.xyz/api"
86+ self .store_url = "https://staging. neurostore.xyz/api"
8487 self .reference_studysets = {
8588 "neurosynth" : gen_database_url ("staging" , "neurosynth" ),
8689 "neuroquery" : gen_database_url ("staging" , "neuroquery" ),
@@ -206,7 +209,7 @@ def _get_result_documents(self, meta_analysis):
206209 result_id = result_ref
207210 result_doc = None
208211 elif isinstance (result_ref , dict ):
209- result_id = result_ref .get ("id" )
212+ result_id = result_ref .get ("id" ) or result_ref . get ( "result_id" )
210213 result_doc = result_ref
211214 else :
212215 continue
@@ -215,14 +218,7 @@ def _get_result_documents(self, meta_analysis):
215218 continue
216219 if result_id is not None :
217220 seen_ids .add (result_id )
218-
219- has_snapshot_payload = any (
220- isinstance (result_doc .get (key ), dict )
221- for key in self ._ENTITY_SNAPSHOT_KEYS ["studyset" ]
222- + self ._ENTITY_SNAPSHOT_KEYS ["annotation" ]
223- ) if isinstance (result_doc , dict ) else False
224-
225- if result_doc is None or (result_id is not None and not has_snapshot_payload ):
221+ if result_doc is None :
226222 if result_id is None :
227223 continue
228224 result_doc = self ._get_json (
@@ -254,24 +250,33 @@ def _get_entity_snapshot_record(self, entity_name, documents):
254250 for document in documents :
255251 if not isinstance (document , dict ):
256252 continue
257- for key in self ._ENTITY_SNAPSHOT_KEYS [entity_name ]:
258- snapshot_document = document .get (key )
259- payload = self ._unwrap_snapshot (snapshot_document )
260- if is_expected_snapshot (payload ):
261- return payload , self ._extract_document_id (snapshot_document )
262- snapshot_id = self ._extract_document_id (snapshot_document )
253+ snapshot_id = None
254+ for key in self ._ENTITY_SNAPSHOT_ID_KEYS [entity_name ]:
255+ snapshot_id = self ._extract_document_id (document .get (key ))
263256 if snapshot_id is None :
264257 continue
265- try :
266- snapshot_document = self ._get_json (
267- f"{ self .compose_url } /{ self ._ENTITY_SNAPSHOT_PATHS [entity_name ]} /{ snapshot_id } " ,
268- f"Could not download { entity_name } snapshot { snapshot_id } " ,
269- )
270- except requests .exceptions .HTTPError :
271- continue
272- payload = self ._unwrap_snapshot (snapshot_document )
273- if is_expected_snapshot (payload ):
274- return payload , self ._extract_document_id (snapshot_document ) or snapshot_id
258+ break
259+ if snapshot_id is None :
260+ ref_key , summary_key = self ._ENTITY_SNAPSHOT_SUMMARY_KEYS [entity_name ]
261+ ref_document = document .get (ref_key )
262+ if isinstance (ref_document , dict ):
263+ summary_documents = ref_document .get (summary_key ) or []
264+ for summary_document in summary_documents :
265+ snapshot_id = self ._extract_document_id (summary_document )
266+ if snapshot_id is not None :
267+ break
268+ if snapshot_id is None :
269+ continue
270+ try :
271+ snapshot_document = self ._get_json (
272+ f"{ self .compose_url } /{ self ._ENTITY_SNAPSHOT_PATHS [entity_name ]} /{ snapshot_id } " ,
273+ f"Could not download { entity_name } snapshot { snapshot_id } " ,
274+ )
275+ except requests .exceptions .HTTPError :
276+ continue
277+ payload = self ._unwrap_snapshot (snapshot_document )
278+ if is_expected_snapshot (payload ):
279+ return payload , snapshot_id
275280 return None , None
276281
277282 @staticmethod
@@ -505,17 +510,27 @@ def apply_filter(self, studyset, annotation):
505510 raise ValueError ("Cannot have multiple conditions and a database studyset." )
506511
507512 elif len (conditions ) == 2 and not database_studyset :
508- second_analysis_ids = [
509- n .analysis .id
510- for n in annotation .notes
511- if n .note .get (f"{ column } " ) == weight_conditions [- 1 ]
512- ]
513+ if column_type == "boolean" :
514+ second_analysis_ids = [
515+ n .analysis .id
516+ for n in annotation .notes
517+ if not n .note .get (f"{ column } " )
518+ ]
519+ else :
520+ second_analysis_ids = [
521+ n .analysis .id
522+ for n in annotation .notes
523+ if n .note .get (f"{ column } " ) == weight_conditions [- 1 ]
524+ ]
513525 second_studyset = studyset .slice (analyses = second_analysis_ids )
514526 second_studyset = second_studyset .combine_analyses ()
515527
516528 return first_studyset , second_studyset
517529
518530 elif len (conditions ) <= 1 and database_studyset :
531+ # collect user study IDs cheaply before loading the large reference database
532+ study_ids = set (studyset .study_ids )
533+
519534 # Download the gzip file
520535 response = requests .get (self .reference_studysets [database_studyset ])
521536
@@ -537,26 +552,17 @@ def apply_filter(self, studyset, annotation):
537552 # Load the JSON data into a dictionary
538553 reference_studyset_dict = json .loads (json_data )
539554
540- reference_studyset = Studyset (reference_studyset_dict , target = self ._TARGET_SPACE )
555+ # pre-filter at the dict level to exclude user studies before constructing
556+ # Studyset, keeping the object small and avoiding expensive materialize calls
557+ reference_studyset_dict ["studies" ] = [
558+ s for s in reference_studyset_dict .get ("studies" , [])
559+ if s ["id" ] not in study_ids
560+ ]
541561
562+ reference_studyset = Studyset (reference_studyset_dict , target = self ._TARGET_SPACE )
542563 del reference_studyset_dict
543- # get study ids from studyset
544- study_ids = set ([s .id for s in studyset .studies ])
545-
546- # reference study ids
547- reference_study_ids = set ([s .id for s in reference_studyset .studies ])
548-
549- keep_study_ids = reference_study_ids - study_ids
550564
551- # get analysis ids from reference studyset
552- analysis_ids = [
553- a .id
554- for s in reference_studyset .studies
555- for a in s .analyses
556- if s .id in keep_study_ids
557- ]
558- second_studyset = reference_studyset .slice (analyses = analysis_ids )
559- second_studyset = second_studyset .combine_analyses ()
565+ second_studyset = reference_studyset .combine_analyses ()
560566
561567 return first_studyset , second_studyset
562568
@@ -591,9 +597,9 @@ def create_result_object(self):
591597 existing_payload ,
592598 existing_id ,
593599 ):
594- data [f"cached_ { entity_name } " ] = existing_id
600+ data [f"snapshot_ { entity_name } _id " ] = existing_id
595601 else :
596- data [f"{ entity_name } _snapshot " ] = live_payload
602+ data [f"snapshot_ { entity_name } " ] = live_payload
597603
598604 resp = requests .post (
599605 f"{ self .compose_url } /meta-analysis-results" ,
0 commit comments