@@ -286,9 +286,93 @@ def _specs_depending_on_option(full: bool, gif: bool) -> str:
286286 return "mandatory_specs"
287287
288288
289+ def _load_clinical_data_list (cdt_path : Path , specs_df : pd .DataFrame ) -> List [str ]:
290+ """Load the list of clinical data fields selected by the user from a txt file.
291+
292+ Parameters
293+ ----------
294+ cdt_path: Path
295+ TXT file containing the data fields the user wishes to have from the excel spreadsheets
296+
297+ specs_df: pd.DataFrame
298+ Dataframe loaded from the specifications
299+
300+ Returns
301+ -------
302+ List[str]
303+ List of selected clinical data fields
304+ """
305+ clinical_data_list = []
306+
307+ specs_values = {
308+ str (value ).strip () for value in specs_df .to_numpy ().ravel () if value != ""
309+ }
310+
311+ with open (cdt_path , "r" , encoding = "utf-8" ) as f :
312+ for i , line in enumerate (f , start = 1 ):
313+ data = line .strip ()
314+
315+ if not data :
316+ continue # Skip empty lines
317+
318+ if data not in specs_values :
319+ raise ValueError (
320+ f"Error at line { i } : '{ data } ' not found in specifications."
321+ )
322+
323+ clinical_data_list .append (data )
324+
325+ if not clinical_data_list :
326+ raise ValueError ("`-clinical_data_txt/cdt` is empty (no valid entries found)." )
327+
328+ return clinical_data_list
329+
330+
331+ def _merge_clinical_data_list_into_df (
332+ clinical_data_list : List [str ], specs_df : pd .DataFrame , df_to_complete : pd .DataFrame
333+ ) -> pd .DataFrame :
334+ """Merge clinical data list into a specs like dataframe to complete.
335+
336+ Parameters
337+ ----------
338+ clinical_data_list: List[str]
339+ List of selected clinical data fields
340+
341+ specs_df: Path
342+ Dataframe loaded from the specifications
343+
344+ df_to_complete: pd.DataFrame
345+ Specs like dataframe to complete
346+
347+ Returns
348+ -------
349+ pd.DataFrame
350+ Dataframe to complete
351+ """
352+ for value in clinical_data_list :
353+ for column_name in df_to_complete .columns :
354+ column_values = df_to_complete [column_name ].astype (str ).tolist ()
355+
356+ if value in column_values :
357+ break
358+
359+ if value in specs_df [column_name ].values :
360+ last_valid_idx = df_to_complete [column_name ].last_valid_index ()
361+
362+ next_idx = last_valid_idx + 1
363+
364+ if next_idx < len (df_to_complete ):
365+ df_to_complete .loc [next_idx , column_name ] = value
366+
367+ else :
368+ df_to_complete .loc [len (df_to_complete ), column_name ] = value
369+
370+ return df_to_complete
371+
372+
289373def prepare_dataset_to_bids_format (
290374 complete_data_df : pd .DataFrame ,
291- path_to_clinical_tsv : Path ,
375+ path_to_clinical_txt : Path ,
292376 gif : bool = False ,
293377 full : bool = False ,
294378) -> Dict [str , pd .DataFrame ]:
@@ -299,8 +383,8 @@ def prepare_dataset_to_bids_format(
299383 complete_data_df: pd.DataFrame
300384 Dataframe containing the merged data extracted from the raw images and the clinical data
301385
302- path_to_clinical_tsv : Path
303- TSV file containing the data fields the user wishes to have from the excel spreadsheets
386+ path_to_clinical_txt : Path
387+ TXT file containing the data fields the user wishes to have from the excel spreadsheets
304388
305389 gif: bool
306390 False by default. If True, indicates the user wants to get all clinical data fields
@@ -328,23 +412,30 @@ def prepare_dataset_to_bids_format(
328412 sep = ";" ,
329413 )
330414
331- # add additional data through csv
332- if path_to_clinical_tsv :
333- additional_data_df = pd .read_csv (path_to_clinical_tsv , sep = "\t " )
334- data_mapping = pd .read_csv (Path (__file__ ).parent / "data_mapping.tsv" , sep = "\t " )
335- pre_addi_df = data_mapping .merge (additional_data_df , how = "inner" , on = "data" )
336- addi_df = pd .DataFrame (
337- [
338- pre_addi_df ["data" ][pre_addi_df ["dest" ] == x ].values .tolist ()
339- for x in ("participants" , "sessions" , "scans" )
340- ]
341- ).transpose ()
342- addi_df .columns = ["participants" , "sessions" , "scans" ]
343- df_to_write = pd .concat ([specifications , addi_df ])
344- else :
345- df_to_write = specifications
415+ if path_to_clinical_txt :
416+ if full :
417+ cprint (
418+ msg = (
419+ "The `-full` flag is being used, "
420+ "using the `-clinical_data_txt/-cdt` option is redundant and will be ignored."
421+ ),
422+ lvl = "warning" ,
423+ )
424+
425+ else :
426+ full_specs = pd .read_csv (
427+ Path (__file__ ).parent / "specifications/full_specs.csv" ,
428+ sep = ";" ,
429+ )
430+
431+ specifications = _merge_clinical_data_list_into_df (
432+ _load_clinical_data_list (path_to_clinical_txt , full_specs ),
433+ full_specs ,
434+ specifications .copy (),
435+ )
436+
346437 return {
347- col : complete_data_df .filter (items = list (df_to_write [col ]))
438+ col : complete_data_df .filter (items = list (specifications [col ]))
348439 for col in ["participants" , "sessions" , "scans" ]
349440 }
350441
0 commit comments