Skip to content

Commit 1befa05

Browse files
ISMAILI AdamISMAILI Adam
authored andcommitted
test and code corrected after review
1 parent 7002f2b commit 1befa05

File tree

2 files changed

+151
-50
lines changed

2 files changed

+151
-50
lines changed

clinica/converters/genfi_to_bids/_utils.py

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -286,56 +286,91 @@ def _specs_depending_on_option(full: bool, gif: bool) -> str:
286286
return "mandatory_specs"
287287

288288

289-
def _load_clinical_data_list(cdt_path: Path, specs_df: pd.DataFrame) -> List[str]:
289+
def _filter_invalid_data(
290+
data: str, index: int, specs_values: set, specs_df: pd.DataFrame
291+
) -> bool:
292+
"""Filter invalid (empty, unknown and not present in 'sessions') data from the user txt file.
293+
294+
Parameters
295+
----------
296+
data: str
297+
Stripped line from the user txt file.
298+
299+
index: int
300+
Index of the line.
301+
302+
specs_values: Dict
303+
Data set loaded from the specifications.
304+
305+
specs_df: pd.DataFrame
306+
Dataframe loaded from the specifications.
307+
308+
Returns
309+
-------
310+
bool
311+
Filtering result. True if the data line should be skipped. False otherwise.
312+
"""
313+
# Skip empty lines
314+
if not data:
315+
return True
316+
317+
# Skip unknown lines in specs
318+
if data not in specs_values:
319+
log_and_warn(
320+
f"Line {index}: '{data}' not found in specifications. It will be ignored.",
321+
UserWarning,
322+
)
323+
return True
324+
325+
# Skip lines not in 'sessions'
326+
if data not in specs_df["sessions"].values:
327+
return True
328+
329+
return False
330+
331+
332+
def _load_clinical_data_list(cdt_path: Path) -> List[str]:
290333
"""Load the list of clinical data fields selected by the user from a txt file.
291334
292335
Parameters
293336
----------
294337
cdt_path: Path
295338
TXT file containing the data fields the user wishes to have from the excel spreadsheets
296339
297-
specs_df: pd.DataFrame
298-
Dataframe loaded from the specifications
299-
300340
Returns
301341
-------
302342
List[str]
303343
List of selected clinical data fields
304344
"""
305345
clinical_data_list = []
306346

307-
specs_values = {
308-
str(value).strip()
309-
for value in specs_df.to_numpy().ravel()
310-
if pd.notna(value) and str(value).strip() != ""
311-
}
347+
full_specs = pd.read_csv(
348+
Path(__file__).parent / "specifications/full_specs.csv",
349+
sep=";",
350+
)
351+
352+
specs_values = {value for value in full_specs.to_numpy().ravel() if pd.notna(value)}
312353

313354
with open(cdt_path, "r", encoding="utf-8") as f:
314355
for i, line in enumerate(f, start=1):
315356
data = line.strip()
316357

317-
if not data:
318-
continue # Skip empty lines
319-
320-
if data not in specs_values:
321-
log_and_warn(
322-
f"Line {i}: '{data}' not found in specifications. It will be ignored.",
323-
UserWarning,
324-
)
358+
if _filter_invalid_data(data, i, specs_values, full_specs):
325359
continue
326360

327361
clinical_data_list.append(data)
328362

329363
if not clinical_data_list:
330364
log_and_warn(
331-
"'-clinical_data_txt/cdt' is empty (no valid entries found).", UserWarning
365+
f"File for option '-clinical_data_txt/cdt' at location {cdt_path} does not contain any valid entry.",
366+
UserWarning,
332367
)
333368

334369
return clinical_data_list
335370

336371

337372
def _merge_clinical_data_list_into_df(
338-
clinical_data_list: List[str], specs_df: pd.DataFrame, df_to_complete: pd.DataFrame
373+
clinical_data_list: List[str], df_to_complete: pd.DataFrame
339374
) -> pd.DataFrame:
340375
"""Merge clinical data list into the 'sessions' column of a specs like dataframe to complete.
341376
@@ -344,9 +379,6 @@ def _merge_clinical_data_list_into_df(
344379
clinical_data_list: List[str]
345380
List of selected clinical data fields
346381
347-
specs_df: Path
348-
Dataframe loaded from the specifications
349-
350382
df_to_complete: pd.DataFrame
351383
Specs like dataframe to complete
352384
@@ -361,16 +393,15 @@ def _merge_clinical_data_list_into_df(
361393
if value in sessions_values:
362394
continue
363395

364-
if value in specs_df["sessions"].values:
365-
last_valid_idx = df_to_complete["sessions"].last_valid_index()
396+
last_valid_idx = df_to_complete["sessions"].last_valid_index()
366397

367-
next_idx = last_valid_idx + 1
398+
next_idx = last_valid_idx + 1
368399

369-
if next_idx < len(df_to_complete):
370-
df_to_complete.loc[next_idx, "sessions"] = value
400+
if next_idx < len(df_to_complete):
401+
df_to_complete.loc[next_idx, "sessions"] = value
371402

372-
else:
373-
df_to_complete.loc[len(df_to_complete), "sessions"] = value
403+
else:
404+
df_to_complete.loc[len(df_to_complete), "sessions"] = value
374405

375406
return df_to_complete
376407

@@ -426,14 +457,8 @@ def prepare_dataset_to_bids_format(
426457
)
427458

428459
else:
429-
full_specs = pd.read_csv(
430-
Path(__file__).parent / "specifications/full_specs.csv",
431-
sep=";",
432-
)
433-
434460
specifications = _merge_clinical_data_list_into_df(
435-
_load_clinical_data_list(path_to_clinical_txt, full_specs),
436-
full_specs,
461+
_load_clinical_data_list(path_to_clinical_txt),
437462
specifications.copy(),
438463
)
439464

test/unittests/converters/test_genfi_to_bids_utils.py

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -560,16 +560,90 @@ def test_specs_depending_on_option(full, gif, expected):
560560
)
561561

562562

563+
SPECS_VALUES = {value for value in FULL_SPECS_DF.to_numpy().ravel() if pd.notna(value)}
564+
565+
566+
def test_filter_empty_data():
567+
from clinica.converters.genfi_to_bids._utils import _filter_invalid_data
568+
569+
assert _filter_invalid_data("", 0, SPECS_VALUES, FULL_SPECS_DF)
570+
571+
572+
def test_filter_unknown_data():
573+
from clinica.converters.genfi_to_bids._utils import _filter_invalid_data
574+
575+
data = "invalid_data_0"
576+
577+
expected = f"Line {0}: '{data}' not found in specifications. It will be ignored."
578+
579+
with pytest.warns(UserWarning, match=re.escape(expected)):
580+
assert _filter_invalid_data(data, 0, SPECS_VALUES, FULL_SPECS_DF)
581+
582+
583+
@pytest.mark.parametrize(
584+
("data", "index"),
585+
[
586+
(
587+
"source",
588+
0,
589+
),
590+
(
591+
"blinded_code",
592+
1,
593+
),
594+
(
595+
"bids_filename",
596+
2,
597+
),
598+
(
599+
"bids_full_path",
600+
3,
601+
),
602+
],
603+
)
604+
def test_filter_no_in_sessions_data(data, index):
605+
from clinica.converters.genfi_to_bids._utils import _filter_invalid_data
606+
607+
assert _filter_invalid_data(data, index, SPECS_VALUES, FULL_SPECS_DF)
608+
609+
610+
@pytest.mark.parametrize(
611+
("data", "index"),
612+
[
613+
(
614+
"genfi_version",
615+
0,
616+
),
617+
(
618+
"aad",
619+
1,
620+
),
621+
(
622+
"aad_1",
623+
2,
624+
),
625+
(
626+
"aad_2",
627+
3,
628+
),
629+
],
630+
)
631+
def test_filter_valid_data(data, index):
632+
from clinica.converters.genfi_to_bids._utils import _filter_invalid_data
633+
634+
assert not _filter_invalid_data(data, index, SPECS_VALUES, FULL_SPECS_DF)
635+
636+
563637
@pytest.mark.parametrize(
564638
("clinical_data_list", "expected"),
565639
[
566640
(
567641
["blinded_family\n", "aad_1\n", "bids_filename"],
568-
["blinded_family", "aad_1", "bids_filename"],
642+
["aad_1"],
569643
),
570644
(
571645
[" blinded_site \n", "\n", "aad\n", "\tbids_full_path\n"],
572-
["blinded_site", "aad", "bids_full_path"],
646+
["aad"],
573647
), # whitespaces + empty lines
574648
],
575649
)
@@ -579,29 +653,31 @@ def test_load_clinical_data_list_success(tmp_path, clinical_data_list, expected)
579653
cdt_path = tmp_path / "additional_clinical_data.txt"
580654
cdt_path.write_text("".join(clinical_data_list), encoding="utf-8")
581655

582-
out = _load_clinical_data_list(cdt_path, FULL_SPECS_DF)
656+
out = _load_clinical_data_list(cdt_path)
583657

584658
assert out == expected
585659

586660

587661
@pytest.mark.parametrize(
588-
("clinical_data_list", "expected"),
662+
"clinical_data_list",
589663
[
590-
([], "'-clinical_data_txt/cdt' is empty (no valid entries found)."),
591-
(
592-
["\n", " \n", "\t\n"],
593-
"'-clinical_data_txt/cdt' is empty (no valid entries found).",
594-
),
664+
[],
665+
["\n", " \n", "\t\n"],
595666
],
596667
)
597-
def test_load_clinical_data_list_empty(tmp_path, clinical_data_list, expected):
668+
def test_load_clinical_data_list_empty(tmp_path, clinical_data_list):
598669
from clinica.converters.genfi_to_bids._utils import _load_clinical_data_list
599670

600671
cdt_path = tmp_path / "additional_clinical_data.txt"
601672
cdt_path.write_text("".join(clinical_data_list), encoding="utf-8")
602673

674+
expected = (
675+
f"File for option '-clinical_data_txt/cdt' at location {cdt_path} "
676+
"does not contain any valid entry."
677+
)
678+
603679
with pytest.warns(UserWarning, match=re.escape(expected)):
604-
_load_clinical_data_list(cdt_path, FULL_SPECS_DF)
680+
_load_clinical_data_list(cdt_path)
605681

606682

607683
def test_load_clinical_data_list_unknown_field(tmp_path):
@@ -613,7 +689,7 @@ def test_load_clinical_data_list_unknown_field(tmp_path):
613689
expected = "Line 2: 'false_field' not found in specifications."
614690

615691
with pytest.warns(UserWarning, match=re.escape(expected)):
616-
_load_clinical_data_list(cdt_path, FULL_SPECS_DF)
692+
_load_clinical_data_list(cdt_path)
617693

618694

619695
def test_merge_clinical_data_list_into_df_in_sessions():
@@ -624,7 +700,7 @@ def test_merge_clinical_data_list_into_df_in_sessions():
624700
clinical_data_list = ["aad", "aad_1", "aad_2"]
625701

626702
out = _merge_clinical_data_list_into_df(
627-
clinical_data_list, FULL_SPECS_DF, TO_COMPLETE_SPECS_DF.copy()
703+
clinical_data_list, TO_COMPLETE_SPECS_DF.copy()
628704
)
629705

630706
expected = pd.DataFrame(
@@ -667,7 +743,7 @@ def test_merge_clinical_data_list_into_df_no_duplicate():
667743
clinical_data_list = ["participant_id", "session_id"]
668744

669745
out = _merge_clinical_data_list_into_df(
670-
clinical_data_list, FULL_SPECS_DF, TO_COMPLETE_SPECS_DF.copy()
746+
clinical_data_list, TO_COMPLETE_SPECS_DF.copy()
671747
)
672748

673749
expected = pd.DataFrame(

0 commit comments

Comments
 (0)