@@ -311,13 +311,17 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
311311 stats = _get_samples (value , stats , hierarchy )
312312 break
313313
314+ # which components already found, so we do not count more than
315+ # once in some incorrectly named datasets
316+ found = {}
314317 for part in Path (assetmeta ["path" ]).name .split ("." )[0 ].split ("_" ):
315- if part .startswith ("sub-" ):
316- subject = part .replace ("sub-" , "" )
318+ if found . get ( "subject" ) and part .startswith ("sub-" ):
319+ found [ " subject" ] = subject = part .split ("sub-" , 1 )[ 1 ]
317320 if subject not in stats ["subjects" ]:
318321 stats ["subjects" ].append (subject )
319- if part .startswith ("sample-" ):
320- sample = part .replace ("sample-" , "" )
322+ found .add ("subject" )
323+ if not found .get ("sample" ) and part .startswith ("sample-" ):
324+ found ["sample" ] = sample = part .replace ("sample-" , "" )
321325 if sample not in stats ["tissuesample" ]:
322326 stats ["tissuesample" ].append (sample )
323327
@@ -338,10 +342,13 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
338342 stats : _stats_type = {}
339343 for meta in metadata :
340344 _add_asset_to_stats (meta , stats )
341-
342345 stats ["numberOfBytes" ] = stats .get ("numberOfBytes" , 0 )
343346 stats ["numberOfFiles" ] = stats .get ("numberOfFiles" , 0 )
344347 stats ["numberOfSubjects" ] = len (stats .pop ("subjects" , [])) or None
348+ if stats ["numberOfSubjects" ]:
349+ # Must not happen. If does -- a bug in software
350+ assert stats ["numberOfFiles" ]
351+ assert stats ["numberOfSubjects" ] <= stats ["numberOfFiles" ]
345352 stats ["numberOfSamples" ] = (
346353 len (stats .pop ("tissuesample" , [])) + len (stats .pop ("slice" , []))
347354 ) or None
0 commit comments