@@ -136,6 +136,14 @@ def _cached_default_masker(target):
136136
137137def _apply_annotation_payloads (source_dict , annotation_payloads ):
138138 """Apply top-level annotation notes into analysis-level annotation dictionaries."""
139+ if annotation_payloads is None :
140+ return source_dict
141+
142+ annotation_payloads = _coerce_annotation_payloads (annotation_payloads )
143+ if not annotation_payloads :
144+ source_dict ["annotations" ] = []
145+ return source_dict
146+
139147 analysis_map = {}
140148 for study in source_dict .get ("studies" , []):
141149 for analysis in study .get ("analyses" , []):
@@ -247,12 +255,27 @@ def _build_tables_from_source(source_dict):
247255 studies_rows = []
248256 analyses_rows = []
249257 ids = []
250- coordinate_rows = []
251258 image_rows = []
252259 metadata_rows = []
253260 annotation_rows = []
254261 text_rows = []
255262
263+ # Coordinate rows: collected as parallel column-arrays for fast DataFrame construction.
264+ # POINT_RELATIONSHIP_COLUMNS are collected as lists and only added when non-all-None.
265+ # The resulting DataFrame is canonicalized by a stable sort on 'id', making
266+ # coordinate ordering explicit while preserving original order for rows with identical ids.
267+ coord_ids_acc : list = []
268+ coord_study_ids_acc : list = []
269+ coord_contrast_ids_acc : list = []
270+ coord_xs : list = []
271+ coord_ys : list = []
272+ coord_zs : list = []
273+ coord_spaces : list = []
274+ prc_lists : dict = {col : [] for col in POINT_RELATIONSHIP_COLUMNS }
275+ prc_seen : dict = {col : False for col in POINT_RELATIONSHIP_COLUMNS }
276+ # Truly sparse extras (point values, coordinate_metadata): (row_index, {col: val})
277+ coord_sparse_extras : list = []
278+
256279 for study in source_dict .get ("studies" , []):
257280 study_id = str (study ["id" ])
258281 studies_rows .append (
@@ -286,29 +309,30 @@ def _build_tables_from_source(source_dict):
286309 "journal" : study .get ("publication" , "" ),
287310 "name" : f"{ study_name } -{ analysis_name } " ,
288311 }
289- study_metadata = study .get ("metadata" , {} ) or {}
290- analysis_metadata = analysis .get ("metadata" , {} ) or {}
312+ study_metadata = study .get ("metadata" ) or {}
313+ analysis_metadata = analysis .get ("metadata" ) or {}
291314 coordinate_metadata , coordinate_metadata_keys = _extract_coordinate_row_metadata (
292315 analysis_metadata ,
293316 len (analysis .get ("points" , []) or []),
294317 )
295- combined_metadata = copy .deepcopy (study_metadata )
296- combined_metadata .update (copy .deepcopy (analysis_metadata ))
297- combined_metadata .pop ("sample_sizes" , None )
298- combined_metadata .pop ("sample_size" , None )
299- for key in coordinate_metadata_keys :
300- combined_metadata .pop (key , None )
301- sample_sizes = _extract_coerced_sample_sizes (
302- [
303- ("sample_sizes" , analysis_metadata .get ("sample_sizes" )),
304- ("sample_size" , analysis_metadata .get ("sample_size" )),
305- ("sample_sizes" , study_metadata .get ("sample_sizes" )),
306- ("sample_size" , study_metadata .get ("sample_size" )),
307- ]
308- )
309- if sample_sizes :
310- combined_metadata ["sample_sizes" ] = sample_sizes
311- metadata_row .update (combined_metadata )
318+ if study_metadata or analysis_metadata :
319+ combined_metadata = copy .deepcopy (study_metadata )
320+ combined_metadata .update (copy .deepcopy (analysis_metadata ))
321+ combined_metadata .pop ("sample_sizes" , None )
322+ combined_metadata .pop ("sample_size" , None )
323+ for key in coordinate_metadata_keys :
324+ combined_metadata .pop (key , None )
325+ sample_sizes = _extract_coerced_sample_sizes (
326+ [
327+ ("sample_sizes" , analysis_metadata .get ("sample_sizes" )),
328+ ("sample_size" , analysis_metadata .get ("sample_size" )),
329+ ("sample_sizes" , study_metadata .get ("sample_sizes" )),
330+ ("sample_size" , study_metadata .get ("sample_size" )),
331+ ]
332+ )
333+ if sample_sizes :
334+ combined_metadata ["sample_sizes" ] = sample_sizes
335+ metadata_row .update (combined_metadata )
312336 metadata_rows .append (metadata_row )
313337
314338 annotation_row = dict (base_row )
@@ -320,7 +344,9 @@ def _build_tables_from_source(source_dict):
320344 annotation_rows .append (annotation_row )
321345
322346 text_row = dict (base_row )
323- text_row .update (copy .deepcopy (analysis .get ("texts" , {}) or {}))
347+ texts = analysis .get ("texts" ) or {}
348+ if texts :
349+ text_row .update (copy .deepcopy (texts ))
324350 text_rows .append (text_row )
325351
326352 image_row = dict (base_row )
@@ -340,37 +366,68 @@ def _build_tables_from_source(source_dict):
340366
341367 for i_point , point in enumerate (analysis .get ("points" , []) or []):
342368 coords = point .get ("coordinates" , [None , None , None ])
343- coordinate_row = {
344- ** base_row ,
345- "x" : float (coords [0 ]),
346- "y" : float (coords [1 ]),
347- "z" : float (coords [2 ]),
348- "space" : point .get ("space" ),
349- }
350- for column in POINT_RELATIONSHIP_COLUMNS :
351- value = point .get (column )
352- if value is not None :
353- coordinate_row [column ] = value
354-
369+ coord_ids_acc .append (full_id )
370+ coord_study_ids_acc .append (study_id )
371+ coord_contrast_ids_acc .append (contrast_id )
372+ coord_xs .append (coords [0 ])
373+ coord_ys .append (coords [1 ])
374+ coord_zs .append (coords [2 ])
375+ coord_spaces .append (point .get ("space" ))
376+
377+ for col in POINT_RELATIONSHIP_COLUMNS :
378+ val = point .get (col )
379+ prc_lists [col ].append (val )
380+ if val is not None :
381+ prc_seen [col ] = True
382+
383+ extra : dict = {}
355384 for point_value in point .get ("values" , []) or []:
356385 if not isinstance (point_value , dict ):
357386 continue
358387 column = _point_value_kind_to_coordinate_column (point_value .get ("kind" ))
359388 value = point_value .get ("value" )
360389 if column is not None and value is not None :
361- coordinate_row [column ] = value
362-
390+ extra [column ] = value
363391 for column , values in coordinate_metadata .items ():
364- coordinate_row [column ] = values [i_point ]
365-
366- coordinate_rows .append (coordinate_row )
392+ extra [column ] = values [i_point ]
393+ if extra :
394+ coord_sparse_extras .append ((len (coord_ids_acc ) - 1 , extra ))
395+
396+ n_coord = len (coord_ids_acc )
397+ if n_coord :
398+ coord_frame : dict = {
399+ "id" : coord_ids_acc ,
400+ "study_id" : coord_study_ids_acc ,
401+ "contrast_id" : coord_contrast_ids_acc ,
402+ "x" : np .asarray (coord_xs , dtype = float ),
403+ "y" : np .asarray (coord_ys , dtype = float ),
404+ "z" : np .asarray (coord_zs , dtype = float ),
405+ "space" : coord_spaces ,
406+ }
407+ for col in POINT_RELATIONSHIP_COLUMNS :
408+ if prc_seen [col ]:
409+ coord_frame [col ] = prc_lists [col ]
410+ if coord_sparse_extras :
411+ extra_cols : dict = {}
412+ for row_idx , extra in coord_sparse_extras :
413+ for col , val in extra .items ():
414+ if col not in extra_cols :
415+ extra_cols [col ] = [None ] * n_coord
416+ extra_cols [col ][row_idx ] = val
417+ coord_frame .update (extra_cols )
418+ id_arr = np .asarray (coord_ids_acc , dtype = str )
419+ coord_frame ["id" ] = id_arr
420+ sort_order = np .argsort (id_arr , kind = "stable" )
421+ coord_df = pd .DataFrame (coord_frame ).iloc [sort_order ].reset_index (drop = True )
422+ else :
423+ coord_df = pd .DataFrame (columns = _ID_COLS + ["x" , "y" , "z" , "space" ])
367424
368425 ids = np .sort (np .asarray (ids , dtype = str ))
369426 return {
370427 "studies" : _rows_to_df (studies_rows , ["study_id" , "name" , "authors" , "publication" ]),
371428 "analyses" : _rows_to_df (analyses_rows , _ID_COLS + ["name" ]),
372429 "ids" : ids ,
373- "coordinates" : _rows_to_df ( coordinate_rows , _ID_COLS + [ "x" , "y" , "z" , "space" ]) ,
430+ "coordinates" : coord_df ,
374431 "images" : _rows_to_df (image_rows , _ID_COLS , normalize_none_strings = True ),
375432 "metadata" : _rows_to_df (metadata_rows , _ID_COLS , normalize_none_strings = True ),
376433 "annotations" : _rows_to_df (annotation_rows , _ID_COLS , normalize_none_strings = True ),
@@ -497,11 +554,8 @@ def from_source_dict(
497554 target ,
498555 harmonize_coordinates = harmonize_coordinates ,
499556 )
500- annotation_payloads = (
501- _coerce_annotation_payloads (annotation_payloads )
502- if annotation_payloads is not None
503- else _coerce_annotation_payloads (source_dict .get ("annotations" , []))
504- )
557+ if annotation_payloads is None :
558+ annotation_payloads = source_dict .get ("annotations" , [])
505559 source_dict = _apply_annotation_payloads (source_dict , annotation_payloads )
506560 return cls (
507561 source_dict ["id" ],
@@ -683,7 +737,7 @@ def selected_source_dict(self, selected_full_ids=None):
683737 }
684738
685739 if selected_full_ids is None :
686- return copy . deepcopy (source_dict )
740+ return _structural_copy_source_dict (source_dict )
687741
688742 selected_ids = set (self .selected_ids (selected_full_ids ).tolist ())
689743 if not selected_ids :
0 commit comments