Skip to content

Commit 9720825

Browse files
ISMAILI AdamISMAILI Adam
authored andcommitted
cdt option modified and tests added
1 parent 7a4adfb commit 9720825

File tree

6 files changed

+278
-1218
lines changed

6 files changed

+278
-1218
lines changed

clinica/converters/genfi_to_bids/_converter.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def convert(
1414
path_to_clinical: Optional[UserProvidedPath] = None,
1515
gif: Optional[bool] = False,
1616
full: Optional[bool] = False,
17-
path_to_clinical_tsv: Optional[UserProvidedPath] = None,
17+
path_to_clinical_txt: Optional[UserProvidedPath] = None,
1818
subjects: Optional[UserProvidedPath] = None,
1919
n_procs: Optional[int] = 1,
2020
**kwargs,
@@ -43,8 +43,8 @@ def convert(
4343
full: bool, optional
4444
If True, indicates the user wants to get all clinical data fields
4545
46-
path_to_clinical_tsv: Path, optional
47-
The path to a TSV file containing the additional data the user wants to have in the BIDS output.
46+
path_to_clinical_txt: Path, optional
47+
The path to a TXT file containing the additional data the user wants to have in the BIDS output.
4848
If None, no additional data will be added.
4949
5050
subjects : str or Path, optional
@@ -74,10 +74,10 @@ def convert(
7474
bids_dir = validate_input_path(bids_dir, check_exist=False)
7575
if path_to_clinical:
7676
path_to_clinical = validate_input_path(path_to_clinical)
77-
if path_to_clinical_tsv:
78-
path_to_clinical_tsv = validate_input_path(path_to_clinical_tsv)
77+
if path_to_clinical_txt:
78+
path_to_clinical_txt = validate_input_path(path_to_clinical_txt)
7979
check_software(ThirdPartySoftware.DCM2NIIX)
80-
_check_clinical_path_inputs(path_to_clinical_tsv, path_to_clinical)
80+
_check_clinical_path_inputs(path_to_clinical_txt, path_to_clinical)
8181
if subjects:
8282
cprint(
8383
(
@@ -96,7 +96,7 @@ def convert(
9696
clinical_data = parse_clinical_data(path_to_clinical)
9797
imaging_data = merge_imaging_and_clinical_data(imaging_data, clinical_data)
9898
results = prepare_dataset_to_bids_format(
99-
imaging_data, path_to_clinical_tsv, gif, full
99+
imaging_data, path_to_clinical_txt, gif, full
100100
)
101101
write_bids(
102102
to=bids_dir,
@@ -116,17 +116,17 @@ def convert(
116116
cprint("Conversion to BIDS succeeded.", lvl="info")
117117

118118

119-
def _check_clinical_path_inputs(path_to_clinical_tsv: Path, path_to_clinical: Path):
120-
"""Check that if a clinical tsv is given, a path to the clinical data is given as well."""
119+
def _check_clinical_path_inputs(path_to_clinical_txt: Path, path_to_clinical: Path):
120+
"""Check that if a clinical txt is given, a path to the clinical data is given as well."""
121121
from clinica.converters.factory import get_converter_name
122122
from clinica.converters.study_models import StudyName
123123
from clinica.utils.stream import cprint
124124

125-
if path_to_clinical_tsv and not path_to_clinical:
125+
if path_to_clinical_txt and not path_to_clinical:
126126
msg = (
127127
f"The {get_converter_name(StudyName.GENFI)} converter is unable to convert the clinical data because "
128-
"the path to these data was not provided while a TSV file with additional "
129-
f"data was given ({path_to_clinical_tsv}). You can either use the appropriate "
128+
"the path to these data was not provided while a TXT file with additional "
129+
f"data was given ({path_to_clinical_txt}). You can either use the appropriate "
130130
"option from the clinica command line interface to provide the missing path, "
131131
"or chose to not convert clinical data at all."
132132
)

clinica/converters/genfi_to_bids/_utils.py

Lines changed: 110 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,93 @@ def _specs_depending_on_option(full: bool, gif: bool) -> str:
286286
return "mandatory_specs"
287287

288288

289+
def _load_clinical_data_list(cdt_path: Path, specs_df: pd.DataFrame) -> List[str]:
290+
"""Load the list of clinical data fields selected by the user from a txt file.
291+
292+
Parameters
293+
----------
294+
cdt_path: Path
295+
TXT file containing the data fields the user wishes to have from the excel spreadsheets
296+
297+
specs_df: pd.DataFrame
298+
Dataframe loaded from the specifications
299+
300+
Returns
301+
-------
302+
List[str]
303+
List of selected clinical data fields
304+
"""
305+
clinical_data_list = []
306+
307+
specs_values = {
308+
str(value).strip() for value in specs_df.to_numpy().ravel() if value != ""
309+
}
310+
311+
with open(cdt_path, "r", encoding="utf-8") as f:
312+
for i, line in enumerate(f, start=1):
313+
data = line.strip()
314+
315+
if not data:
316+
continue # Skip empty lines
317+
318+
if data not in specs_values:
319+
raise ValueError(
320+
f"Error at line {i}: '{data}' not found in specifications."
321+
)
322+
323+
clinical_data_list.append(data)
324+
325+
if not clinical_data_list:
326+
raise ValueError("`-clinical_data_txt/cdt` is empty (no valid entries found).")
327+
328+
return clinical_data_list
329+
330+
331+
def _merge_clinical_data_list_into_df(
332+
clinical_data_list: List[str], specs_df: pd.DataFrame, df_to_complete: pd.DataFrame
333+
) -> pd.DataFrame:
334+
"""Merge clinical data list into a specs like dataframe to complete.
335+
336+
Parameters
337+
----------
338+
clinical_data_list: List[str]
339+
List of selected clinical data fields
340+
341+
specs_df: Path
342+
Dataframe loaded from the specifications
343+
344+
df_to_complete: pd.DataFrame
345+
Specs like dataframe to complete
346+
347+
Returns
348+
-------
349+
pd.DataFrame
350+
Dataframe to complete
351+
"""
352+
for value in clinical_data_list:
353+
for column_name in df_to_complete.columns:
354+
column_values = df_to_complete[column_name].astype(str).tolist()
355+
356+
if value in column_values:
357+
break
358+
359+
if value in specs_df[column_name].values:
360+
last_valid_idx = df_to_complete[column_name].last_valid_index()
361+
362+
next_idx = last_valid_idx + 1
363+
364+
if next_idx < len(df_to_complete):
365+
df_to_complete.loc[next_idx, column_name] = value
366+
367+
else:
368+
df_to_complete.loc[len(df_to_complete), column_name] = value
369+
370+
return df_to_complete
371+
372+
289373
def prepare_dataset_to_bids_format(
290374
complete_data_df: pd.DataFrame,
291-
path_to_clinical_tsv: Path,
375+
path_to_clinical_txt: Path,
292376
gif: bool = False,
293377
full: bool = False,
294378
) -> Dict[str, pd.DataFrame]:
@@ -299,8 +383,8 @@ def prepare_dataset_to_bids_format(
299383
complete_data_df: pd.DataFrame
300384
Dataframe containing the merged data extracted from the raw images and the clinical data
301385
302-
path_to_clinical_tsv: Path
303-
TSV file containing the data fields the user wishes to have from the excel spreadsheets
386+
path_to_clinical_txt: Path
387+
TXT file containing the data fields the user wishes to have from the excel spreadsheets
304388
305389
gif: bool
306390
False by default. If True, indicates the user wants to get all clinical data fields
@@ -328,23 +412,30 @@ def prepare_dataset_to_bids_format(
328412
sep=";",
329413
)
330414

331-
# add additional data through csv
332-
if path_to_clinical_tsv:
333-
additional_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t")
334-
data_mapping = pd.read_csv(Path(__file__).parent / "data_mapping.tsv", sep="\t")
335-
pre_addi_df = data_mapping.merge(additional_data_df, how="inner", on="data")
336-
addi_df = pd.DataFrame(
337-
[
338-
pre_addi_df["data"][pre_addi_df["dest"] == x].values.tolist()
339-
for x in ("participants", "sessions", "scans")
340-
]
341-
).transpose()
342-
addi_df.columns = ["participants", "sessions", "scans"]
343-
df_to_write = pd.concat([specifications, addi_df])
344-
else:
345-
df_to_write = specifications
415+
if path_to_clinical_txt:
416+
if full:
417+
cprint(
418+
msg=(
419+
"The `-full` flag is being used, "
420+
"using the `-clinical_data_txt/-cdt` option is redundant and will be ignored."
421+
),
422+
lvl="warning",
423+
)
424+
425+
else:
426+
full_specs = pd.read_csv(
427+
Path(__file__).parent / "specifications/full_specs.csv",
428+
sep=";",
429+
)
430+
431+
specifications = _merge_clinical_data_list_into_df(
432+
_load_clinical_data_list(path_to_clinical_txt, full_specs),
433+
full_specs,
434+
specifications.copy(),
435+
)
436+
346437
return {
347-
col: complete_data_df.filter(items=list(df_to_write[col]))
438+
col: complete_data_df.filter(items=list(specifications[col]))
348439
for col in ["participants", "sessions", "scans"]
349440
}
350441

clinica/converters/genfi_to_bids/cli.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
help="Add all clinical data (mandatory + optional) to sessions.tsv.",
2222
)
2323

24-
clinical_data_tsv = click.option(
24+
clinical_data_txt = click.option(
2525
"-cdt",
26-
"--clinical-data-tsv",
27-
"clinical_data_tsv",
26+
"--clinical-data-txt",
27+
"clinical_data_txt",
2828
type=click.Path(exists=True, file_okay=True, resolve_path=True),
29-
help="Path to a tsv file containing additional clinical data you want to have in the BIDS output.",
29+
help="Path to a txt file containing additional clinical data you want to have in the BIDS output.",
3030
)
3131

3232

@@ -36,12 +36,12 @@
3636
@clinical_data_directory
3737
@gif
3838
@full
39-
@clinical_data_tsv
39+
@clinical_data_txt
4040
def cli(
4141
dataset_directory: PathLike,
4242
bids_directory: PathLike,
4343
clinical_data_directory: Optional[PathLike] = None,
44-
clinical_data_tsv: Optional[PathLike] = None,
44+
clinical_data_txt: Optional[PathLike] = None,
4545
gif: Optional[bool] = False,
4646
full: Optional[bool] = False,
4747
) -> None:
@@ -58,7 +58,7 @@ def cli(
5858
clinical_data_directory,
5959
gif=gif,
6060
full=full,
61-
path_to_clinical_tsv=clinical_data_tsv,
61+
path_to_clinical_txt=clinical_data_txt,
6262
)
6363

6464

0 commit comments

Comments
 (0)