aramis-lab
diff --git a/‎clinica/converters/genfi_to_bids/_converter.py‎
Lines changed: 12 additions & 12 deletions b/‎clinica/converters/genfi_to_bids/_converter.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎clinica/converters/genfi_to_bids/_utils.py‎
Lines changed: 110 additions & 19 deletions b/‎clinica/converters/genfi_to_bids/_utils.py‎
Lines changed: 110 additions & 19 deletions
diff --git a/‎clinica/converters/genfi_to_bids/cli.py‎
Lines changed: 7 additions & 7 deletions b/‎clinica/converters/genfi_to_bids/cli.py‎
Lines changed: 7 additions & 7 deletions
@@ -14,7 +14,7 @@ def convert(
     path_to_clinical: Optional[UserProvidedPath] = None,
     gif: Optional[bool] = False,
     full: Optional[bool] = False,
-    path_to_clinical_tsv: Optional[UserProvidedPath] = None,
+    path_to_clinical_txt: Optional[UserProvidedPath] = None,
     subjects: Optional[UserProvidedPath] = None,
     n_procs: Optional[int] = 1,
     **kwargs,
@@ -43,8 +43,8 @@ def convert(
     full: bool, optional
         If True, indicates the user wants to get all clinical data fields
 
-    path_to_clinical_tsv: Path, optional
-        The path to a TSV file containing the additional data the user wants to have in the BIDS output.
+    path_to_clinical_txt: Path, optional
+        The path to a TXT file containing the additional data the user wants to have in the BIDS output.
         If None, no additional data will be added.
 
     subjects : str or Path, optional
@@ -74,10 +74,10 @@ def convert(
     bids_dir = validate_input_path(bids_dir, check_exist=False)
     if path_to_clinical:
         path_to_clinical = validate_input_path(path_to_clinical)
-    if path_to_clinical_tsv:
-        path_to_clinical_tsv = validate_input_path(path_to_clinical_tsv)
+    if path_to_clinical_txt:
+        path_to_clinical_txt = validate_input_path(path_to_clinical_txt)
     check_software(ThirdPartySoftware.DCM2NIIX)
-    _check_clinical_path_inputs(path_to_clinical_tsv, path_to_clinical)
+    _check_clinical_path_inputs(path_to_clinical_txt, path_to_clinical)
     if subjects:
         cprint(
             (
@@ -96,7 +96,7 @@ def convert(
         clinical_data = parse_clinical_data(path_to_clinical)
         imaging_data = merge_imaging_and_clinical_data(imaging_data, clinical_data)
     results = prepare_dataset_to_bids_format(
-        imaging_data, path_to_clinical_tsv, gif, full
+        imaging_data, path_to_clinical_txt, gif, full
     )
     write_bids(
         to=bids_dir,
@@ -116,17 +116,17 @@ def convert(
     cprint("Conversion to BIDS succeeded.", lvl="info")
 
 
-def _check_clinical_path_inputs(path_to_clinical_tsv: Path, path_to_clinical: Path):
-    """Check that if a clinical tsv is given, a path to the clinical data is given as well."""
+def _check_clinical_path_inputs(path_to_clinical_txt: Path, path_to_clinical: Path):
+    """Check that if a clinical txt is given, a path to the clinical data is given as well."""
     from clinica.converters.factory import get_converter_name
     from clinica.converters.study_models import StudyName
     from clinica.utils.stream import cprint
 
-    if path_to_clinical_tsv and not path_to_clinical:
+    if path_to_clinical_txt and not path_to_clinical:
         msg = (
             f"The {get_converter_name(StudyName.GENFI)} converter is unable to convert the clinical data because "
-            "the path to these data was not provided while a TSV file with additional "
-            f"data was given ({path_to_clinical_tsv}). You can either use the appropriate "
+            "the path to these data was not provided while a TXT file with additional "
+            f"data was given ({path_to_clinical_txt}). You can either use the appropriate "
             "option from the clinica command line interface to provide the missing path, "
             "or chose to not convert clinical data at all."
         )
 
@@ -286,9 +286,93 @@ def _specs_depending_on_option(full: bool, gif: bool) -> str:
     return "mandatory_specs"
 
 
+def _load_clinical_data_list(cdt_path: Path, specs_df: pd.DataFrame) -> List[str]:
+    """Load the list of clinical data fields selected by the user from a txt file.
+
+    Parameters
+    ----------
+    cdt_path: Path
+        TXT file containing the data fields the user wishes to have from the excel spreadsheets
+
+    specs_df: pd.DataFrame
+        Dataframe loaded from the specifications
+
+    Returns
+    -------
+    List[str]
+        List of selected clinical data fields
+    """
+    clinical_data_list = []
+
+    specs_values = {
+        str(value).strip() for value in specs_df.to_numpy().ravel() if value != ""
+    }
+
+    with open(cdt_path, "r", encoding="utf-8") as f:
+        for i, line in enumerate(f, start=1):
+            data = line.strip()
+
+            if not data:
+                continue  # Skip empty lines
+
+            if data not in specs_values:
+                raise ValueError(
+                    f"Error at line {i}: '{data}' not found in specifications."
+                )
+
+            clinical_data_list.append(data)
+
+    if not clinical_data_list:
+        raise ValueError("`-clinical_data_txt/cdt` is empty (no valid entries found).")
+
+    return clinical_data_list
+
+
+def _merge_clinical_data_list_into_df(
+    clinical_data_list: List[str], specs_df: pd.DataFrame, df_to_complete: pd.DataFrame
+) -> pd.DataFrame:
+    """Merge clinical data list into a specs like dataframe to complete.
+
+    Parameters
+    ----------
+    clinical_data_list: List[str]
+        List of selected clinical data fields
+
+    specs_df: Path
+        Dataframe loaded from the specifications
+
+    df_to_complete: pd.DataFrame
+        Specs like dataframe to complete
+
+    Returns
+    -------
+    pd.DataFrame
+        Dataframe to complete
+    """
+    for value in clinical_data_list:
+        for column_name in df_to_complete.columns:
+            column_values = df_to_complete[column_name].astype(str).tolist()
+
+            if value in column_values:
+                break
+
+            if value in specs_df[column_name].values:
+                last_valid_idx = df_to_complete[column_name].last_valid_index()
+
+                next_idx = last_valid_idx + 1
+
+                if next_idx < len(df_to_complete):
+                    df_to_complete.loc[next_idx, column_name] = value
+
+                else:
+                    df_to_complete.loc[len(df_to_complete), column_name] = value
+
+    return df_to_complete
+
+
 def prepare_dataset_to_bids_format(
     complete_data_df: pd.DataFrame,
-    path_to_clinical_tsv: Path,
+    path_to_clinical_txt: Path,
     gif: bool = False,
     full: bool = False,
 ) -> Dict[str, pd.DataFrame]:
@@ -299,8 +383,8 @@ def prepare_dataset_to_bids_format(
     complete_data_df: pd.DataFrame
         Dataframe containing the merged data extracted from the raw images and the clinical data
 
-    path_to_clinical_tsv: Path
-        TSV file containing the data fields the user wishes to have from the excel spreadsheets
+    path_to_clinical_txt: Path
+        TXT file containing the data fields the user wishes to have from the excel spreadsheets
 
     gif: bool
         False by default. If True, indicates the user wants to get all clinical data fields
@@ -328,23 +412,30 @@ def prepare_dataset_to_bids_format(
         sep=";",
     )
 
-    # add additional data through csv
-    if path_to_clinical_tsv:
-        additional_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t")
-        data_mapping = pd.read_csv(Path(__file__).parent / "data_mapping.tsv", sep="\t")
-        pre_addi_df = data_mapping.merge(additional_data_df, how="inner", on="data")
-        addi_df = pd.DataFrame(
-            [
-                pre_addi_df["data"][pre_addi_df["dest"] == x].values.tolist()
-                for x in ("participants", "sessions", "scans")
-            ]
-        ).transpose()
-        addi_df.columns = ["participants", "sessions", "scans"]
-        df_to_write = pd.concat([specifications, addi_df])
-    else:
-        df_to_write = specifications
+    if path_to_clinical_txt:
+        if full:
+            cprint(
+                msg=(
+                    "The `-full` flag is being used, "
+                    "using the `-clinical_data_txt/-cdt` option is redundant and will be ignored."
+                ),
+                lvl="warning",
+            )
+
+        else:
+            full_specs = pd.read_csv(
+                Path(__file__).parent / "specifications/full_specs.csv",
+                sep=";",
+            )
+
+            specifications = _merge_clinical_data_list_into_df(
+                _load_clinical_data_list(path_to_clinical_txt, full_specs),
+                full_specs,
+                specifications.copy(),
+            )
+
     return {
-        col: complete_data_df.filter(items=list(df_to_write[col]))
+        col: complete_data_df.filter(items=list(specifications[col]))
         for col in ["participants", "sessions", "scans"]
     }
 
 
@@ -21,12 +21,12 @@
     help="Add all clinical data (mandatory + optional) to sessions.tsv.",
 )
 
-clinical_data_tsv = click.option(
+clinical_data_txt = click.option(
     "-cdt",
-    "--clinical-data-tsv",
-    "clinical_data_tsv",
+    "--clinical-data-txt",
+    "clinical_data_txt",
     type=click.Path(exists=True, file_okay=True, resolve_path=True),
-    help="Path to a tsv file containing additional clinical data you want to have in the BIDS output.",
+    help="Path to a txt file containing additional clinical data you want to have in the BIDS output.",
 )
 
 
@@ -36,12 +36,12 @@
 @clinical_data_directory
 @gif
 @full
-@clinical_data_tsv
+@clinical_data_txt
 def cli(
     dataset_directory: PathLike,
     bids_directory: PathLike,
     clinical_data_directory: Optional[PathLike] = None,
-    clinical_data_tsv: Optional[PathLike] = None,
+    clinical_data_txt: Optional[PathLike] = None,
     gif: Optional[bool] = False,
     full: Optional[bool] = False,
 ) -> None:
@@ -58,7 +58,7 @@ def cli(
         clinical_data_directory,
         gif=gif,
         full=full,
-        path_to_clinical_tsv=clinical_data_tsv,
+        path_to_clinical_txt=clinical_data_txt,
     )