33
44import numpy as np
55import pandas as pd
6+ from fsspec .implementations .local import LocalFileSystem
7+
8+ from clinica .converters ._utils import write_to_tsv
9+ from clinica .dataset .bids ._filename import Extension
10+ from clinica .utils .stream import cprint
611
712__all__ = [
813 "find_clinical_data" ,
@@ -303,17 +308,21 @@ def write_bids(
303308 scans : pd .DataFrame ,
304309 dataset_directory : Path ,
305310) -> None :
306- from fsspec .implementations .local import LocalFileSystem
311+ fs = LocalFileSystem (auto_mkdir = True )
312+ _write_description_and_participants (participants , to , fs )
313+ _write_sessions (sessions , to , fs )
314+ _write_images (scans , to , dataset_directory , fs )
315+ _write_scans (scans , to )
307316
308- from clinica .converters ._utils import write_to_tsv
317+
318+ def _write_description_and_participants (
319+ participants : pd .DataFrame , to : Path , fs : LocalFileSystem
320+ ):
309321 from clinica .converters .study_models import StudyName
310322 from clinica .dataset import BIDSDatasetDescription
311323
312- fs = LocalFileSystem (auto_mkdir = True )
313-
314- participants = participants .droplevel (
315- ["sessions" , "modality" , "bids_filename" ]
316- ).drop_duplicates ()
324+ participants = participants .droplevel (["sessions" , "modality" , "bids_filename" ])
325+ participants = participants .loc [~ participants .index .duplicated (keep = "first" )]
317326
318327 # Ensure BIDS hierarchy is written first.
319328 with fs .transaction :
@@ -326,72 +335,79 @@ def write_bids(
326335 with fs .open (str (to / "participants.tsv" ), "w" ) as participant_file :
327336 write_to_tsv (participants , participant_file )
328337
338+
339+ def _write_sessions (sessions : pd .DataFrame , to : Path , fs : LocalFileSystem ):
329340 for participant_id , data_frame in sessions .groupby ("participant_id" ):
330- sessions = data_frame .droplevel (
341+ sessions_to_write = data_frame .droplevel (
331342 ["participant_id" , "modality" , "bids_filename" ]
332343 ).drop_duplicates ()
333-
344+ sessions_to_write . index . name = "session_id"
334345 sessions_filepath = to / str (participant_id ) / f"{ participant_id } _sessions.tsv"
335346 with fs .open (str (sessions_filepath ), "w" ) as sessions_file :
336- write_to_tsv (sessions , sessions_file )
347+ write_to_tsv (sessions_to_write , sessions_file )
337348
338- scans = scans .reset_index ().set_index (["bids_full_path" ], verify_integrity = True )
339349
350+ def _write_images (scans : pd .DataFrame , to : Path , source : Path , fs : LocalFileSystem ):
351+ scans = scans .reset_index ().set_index (["bids_full_path" ], verify_integrity = True )
340352 for bids_full_path , metadata in scans .iterrows ():
341353 if metadata ["modality_num" ] != "20217" and metadata ["modality_num" ] != "20225" :
342354 _copy_file_to_bids (
343- zipfile = dataset_directory / metadata ["source_zipfile" ],
355+ zipfile = source / metadata ["source_zipfile" ],
344356 filenames = [metadata ["source_filename" ]] + metadata ["sidecars" ],
345357 bids_path = to / bids_full_path ,
346358 )
347359 else :
348360 _convert_dicom_to_nifti (
349- zipfiles = dataset_directory / metadata ["source_zipfile" ],
361+ zipfiles = source / metadata ["source_zipfile" ],
350362 bids_path = to / bids_full_path ,
363+ fs = fs ,
351364 )
352365 if metadata ["modality_num" ] == "20217" :
353- _import_event_tsv (bids_path = to )
354-
355- _write_row_in_scans_tsv_file (metadata , to )
356-
357-
358- def _write_row_in_scans_tsv_file (row : pd .Series , to : Path ):
359- """Write rows from a dataframe into a scans.tsv file.
366+ _import_event_tsv (bids_path = to , fs = fs )
360367
361- Parameters
362- ----------
363- row : pd.Series
364- Row to write into the scans.tsv file.
365-
366- to : Path
367- Path to the BIDS folder.
368- """
369- scans_filepath = (
370- to
371- / str (row .participant_id )
372- / str (row .sessions )
373- / f"{ row .participant_id } _{ row .sessions } _scans.tsv"
374- )
375- row_to_write = _serialize_row (
376- row .drop (["participant_id" , "sessions" ]),
377- write_column_names = not scans_filepath .exists (),
378- )
379- with open (scans_filepath , "a" ) as scans_file :
380- scans_file .write (f"{ row_to_write } \n " )
381368
369+ def _get_extensions_from_sidecars (sidecars : list [str ]) -> list [str ]:
370+ extensions = []
371+ for side in sidecars :
372+ try :
373+ extensions += [Extension ("." + side .split ("." )[1 ])]
374+ except (ValueError , IndexError ) as e :
375+ cprint (
376+ "An invalid extension for bids files was found and won't be registered in scans.tsv. Please check your files." ,
377+ lvl = "warning" ,
378+ )
379+ return extensions + [Extension (".nii.gz" )]
382380
383- def _serialize_row (row : pd .Series , write_column_names : bool ) -> str :
384- row_dict = row .to_dict ()
385- to_write = (
386- [row_dict .keys (), row_dict .values ()]
387- if write_column_names
388- else [row_dict .values ()]
389- )
390- return "\n " .join ([_serialize_list (list (_ )) for _ in to_write ])
391381
382+ def _write_scans (scans : pd .DataFrame , to : Path ) -> None :
383+ for subject_session , data in scans .groupby (["participant_id" , "sessions" ]):
384+ data ["filename_no_extension" ] = data ["bids_full_path" ].apply (
385+ lambda x : f"{ Path (x ).parent .name } /{ Path (x ).name } "
386+ )
387+ data ["extensions" ] = data ["sidecars" ].apply (
388+ lambda x : _get_extensions_from_sidecars (x )
389+ )
392390
393- def _serialize_list (data : list , sep = "\t " ) -> str :
394- return sep .join ([str (value ) for value in data ])
391+ to_write = pd .DataFrame (columns = ["filename" ])
392+
393+ for _ , line in data .iterrows ():
394+ for extension in line .extensions :
395+ to_write = pd .concat (
396+ [
397+ to_write ,
398+ pd .DataFrame (
399+ {"filename" : [line .filename_no_extension + extension ]}
400+ ),
401+ ]
402+ )
403+ to_write .to_csv (
404+ to
405+ / subject_session [0 ]
406+ / subject_session [1 ]
407+ / f"{ subject_session [0 ]} _{ subject_session [1 ]} _scans.tsv" ,
408+ sep = "\t " ,
409+ index = False ,
410+ )
395411
396412
397413def _copy_file_to_bids (zipfile : Path , filenames : List [Path ], bids_path : Path ) -> None :
@@ -408,7 +424,9 @@ def _copy_file_to_bids(zipfile: Path, filenames: List[Path], bids_path: Path) ->
408424 f .write (fs .cat (filename ))
409425
410426
411- def _convert_dicom_to_nifti (zipfiles : Path , bids_path : Path ) -> None :
427+ def _convert_dicom_to_nifti (
428+ zipfiles : Path , bids_path : Path , fs : LocalFileSystem
429+ ) -> None :
412430 """Install the requested files in the BIDS dataset.
413431 First, the dicom is extracted in a temporary directory
414432 Second, the dicom extracted is converted in the right place using dcm2niix"""
@@ -418,10 +436,6 @@ def _convert_dicom_to_nifti(zipfiles: Path, bids_path: Path) -> None:
418436 import zipfile
419437 from pathlib import PurePath
420438
421- from fsspec .implementations .local import LocalFileSystem
422-
423- fs = LocalFileSystem (auto_mkdir = True )
424-
425439 zf = zipfile .ZipFile (zipfiles )
426440 try :
427441 bids_path .parent .mkdir (exist_ok = True , parents = True )
@@ -430,15 +444,9 @@ def _convert_dicom_to_nifti(zipfiles: Path, bids_path: Path) -> None:
430444 pass
431445 with tempfile .TemporaryDirectory () as tempdir :
432446 zf .extractall (tempdir )
433- command = [
434- "dcm2niix" ,
435- "-w" ,
436- "0" ,
437- ]
438- command += ["-9" , "-z" , "y" ]
439- command += ["-b" , "y" , "-ba" , "y" ]
440- command += [tempdir ]
441- subprocess .run (command )
447+ subprocess .run (
448+ ["dcm2niix" , "-w" , "0" , "-9" , "-z" , "y" , "-b" , "y" , "-ba" , "y" , tempdir ]
449+ )
442450 fmri_image_path = _find_largest_imaging_data (Path (tempdir ))
443451 fmri_image_path = fmri_image_path or ""
444452 fs .copy (str (fmri_image_path ), str (bids_path ) + ".nii.gz" )
@@ -485,11 +493,8 @@ def _select_sessions(x: pd.Series) -> Optional[float]:
485493 return None
486494
487495
488- def _import_event_tsv (bids_path : Path ) -> None :
496+ def _import_event_tsv (bids_path : Path , fs : LocalFileSystem ) -> None :
489497 """Import the csv containing the events' information."""
490- from fsspec .implementations .local import LocalFileSystem
491-
492- fs = LocalFileSystem (auto_mkdir = True )
493498 event_tsv = (
494499 Path (__file__ ).parents [2 ]
495500 / "resources"
0 commit comments