diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 4ed9e45e7..f137e5830 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2024 C-PAC Developers +# Copyright (C) 2016-2025 C-PAC Developers # This file is part of C-PAC. @@ -324,7 +324,8 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): for k, v in params.items(): if isinstance(v, str): - params[k] = v.encode("ascii", errors="ignore") + # Force all strings to be ASCII-compatible UTF-8 (by removing all non-ASCII characters) + params[k] = v.encode("ascii", errors="ignore").decode("ascii") return params @@ -802,12 +803,10 @@ def bids_gen_cpac_sublist( return sublist -def collect_bids_files_configs(bids_dir, aws_input_creds=""): - """ - :param bids_dir: - :param aws_input_creds: - :return: - """ +def collect_bids_files_configs( + bids_dir: str, aws_input_creds: Optional[str] = "" +) -> tuple[list[str], dict[str, Any]]: + """Collect NIfTI file paths and JSON configurations from a BIDS directory.""" file_paths = [] config_dict = {} @@ -853,7 +852,7 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): except Exception as e: msg = ( f"Error retrieving {s3_obj.key.replace(prefix, '')}" - f" ({e.message})" + f" ({getattr(e, 'message', str(e))})" ) raise SpecifiedBotoCoreError(msg) from e elif "nii" in str(s3_obj.key): @@ -862,7 +861,7 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): ) else: - for root, dirs, files in os.walk(bids_dir, topdown=False, followlinks=True): + for root, _dirs, files in os.walk(bids_dir, topdown=False, followlinks=True): if files: for f in files: for suf in suffixes: @@ -1086,26 +1085,26 @@ def cl_strip_brackets(arg_list): def create_cpac_data_config( - bids_dir, - participant_labels=None, - aws_input_creds=None, - skip_bids_validator=False, - only_one_anat=True, -): + bids_dir: str, + participant_labels: Optional[list[str]] = None, + aws_input_creds: Optional[str] = None, + skip_bids_validator: bool = False, + only_one_anat: bool = True, +) -> list: """ Create a C-PAC data config YAML file from a BIDS directory. Parameters ---------- - bids_dir : str + bids_dir - participant_labels : list or None + participant_labels aws_input_creds - skip_bids_validator : bool + skip_bids_validator - only_one_anat : bool + only_one_anat The "anat" key for a subject expects a string value, but we can temporarily store a list instead by passing True here if we will be filtering that list down to a single string later @@ -1129,8 +1128,10 @@ def create_cpac_data_config( ] if not file_paths: - UTLOGGER.error("Did not find data for %s", ", ".join(participant_labels)) - sys.exit(1) + if participant_labels: + UTLOGGER.error("Did not find data for %s", ", ".join(participant_labels)) + msg = f"Did not find data in {bids_dir}" + raise FileNotFoundError(msg) raise_error = not skip_bids_validator @@ -1145,7 +1146,8 @@ def create_cpac_data_config( if not sub_list: UTLOGGER.error("Did not find data in %s", bids_dir) - sys.exit(1) + msg = f"Did not find data in {bids_dir}" + raise FileNotFoundError(msg) return sub_list diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index 89416edd1..f1251b422 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -514,9 +514,14 @@ def match_epi_fmaps( with open(scan_params, "r") as f: scan_params = json.load(f) if "PhaseEncodingDirection" in scan_params: + from CPAC.utils.datatypes import PHASE_ENCODING_DIRECTIONS + epi_pedir: str | bytes = scan_params["PhaseEncodingDirection"] if isinstance(epi_pedir, bytes): epi_pedir = epi_pedir.decode("utf-8") + assert ( + epi_pedir in PHASE_ENCODING_DIRECTIONS + ), f"PhaseEncodingDirection {epi_pedir} not valid (should be one of {PHASE_ENCODING_DIRECTIONS})" if epi_pedir == bold_pedir: same_pe_epi = epi_scan elif epi_pedir[0] == bold_pedir[0]: diff --git a/CPAC/utils/datatypes.py b/CPAC/utils/datatypes.py index 2b29d8741..ffd77b8e1 100644 --- a/CPAC/utils/datatypes.py +++ b/CPAC/utils/datatypes.py @@ -68,3 +68,8 @@ def __init__(self, *args, **kwargs): else: args = ([args[0]],) list.__init__(self, *args, **kwargs) + + +PHASE_ENCODING_DIRECTIONS: list[str] = [ + f"{i}{sign}" for i in ["i", "j", "k"] for sign in ["", "-"] +] diff --git a/CPAC/utils/interfaces/datasink.py b/CPAC/utils/interfaces/datasink.py index c6c77d6c6..fcfbd451b 100644 --- a/CPAC/utils/interfaces/datasink.py +++ b/CPAC/utils/interfaces/datasink.py @@ -30,7 +30,7 @@ # Prior to release 0.12, Nipype was licensed under a BSD license. -# Modifications Copyright (C) 2019-2024 C-PAC Developers +# Modifications Copyright (C) 2019-2025 C-PAC Developers # This file is part of C-PAC. """Interface that allow interaction with data. @@ -139,6 +139,7 @@ def _fetch_bucket(self, bucket_name): # Import packages try: import boto3 + import boto3.session import botocore except ImportError: err_msg = "Boto3 package is not installed - install boto3 and try again." diff --git a/CPAC/utils/tests/test_bids_utils.py b/CPAC/utils/tests/test_bids_utils.py index 2b7267af9..1c371af18 100644 --- a/CPAC/utils/tests/test_bids_utils.py +++ b/CPAC/utils/tests/test_bids_utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021-2024 C-PAC Developers +# Copyright (C) 2021-2025 C-PAC Developers # This file is part of C-PAC. @@ -17,9 +17,15 @@ """Tests for bids_utils.""" from importlib import resources +from itertools import permutations import os +from pathlib import Path from subprocess import run +from warnings import warn +import boto3 +from botocore import UNSIGNED +from botocore.client import Config import pytest import yaml @@ -33,6 +39,7 @@ load_yaml_config, sub_list_filter_by_labels, ) +from CPAC.utils.datatypes import PHASE_ENCODING_DIRECTIONS from CPAC.utils.monitoring.custom_logging import getLogger logger = getLogger("CPAC.utils.tests") @@ -194,3 +201,68 @@ def test_sub_list_filter_by_labels(t1w_label, bold_label, participant_label): ) else: assert all(len(sub.get("func")) in [0, 5] for sub in sub_list) + + +@pytest.mark.parametrize("participant_labels", [["NDARAB348EWR"]]) +def test_scan_parameter_type(tmp_path: Path, participant_labels: list[str]) -> None: + """Test that scan parameter types are correctly interpreted.""" + bids_dir = _gather_scan_parameter_test_data(tmp_path, participant_labels) + data_config = create_cpac_data_config(bids_dir, participant_labels) + assert len(data_config) == 1 + if "fmap" in data_config[0]: + assert ( + data_config[0]["fmap"]["epi_PA"]["scan_parameters"][ + "PhaseEncodingDirection" + ] + in PHASE_ENCODING_DIRECTIONS + ) + + +def _gather_scan_parameter_test_data( + root_dir: Path, participant_labels: list[str] +) -> str: + """Create a test BIDS dataset with structure for the given subject. + + Downloads JSON files from S3 and creates empty placeholder files for imaging data. + """ + s3_bucket = "fcp-indi" + bids_dir = root_dir / "data" + for _participant in participant_labels: + participant = ( + f"sub-{_participant}" + if not _participant.startswith("sub-") + else _participant + ) + s3_prefix = f"data/Projects/HBN/MRI/Site-CBIC/{participant}" + s3_client = boto3.client("s3", config=Config(signature_version=UNSIGNED)) + files = { + "anat": [ + f"{participant}_acq-HCP_run-01_T1w", + ], + "fmap": [ + f"{participant}_dir-{direction}_acq-{acq}_epi" + for direction in [ + "".join(direction) for direction in permutations(["A", "P"], 2) + ] + for acq in ["dwi", "fMRI"] + ], + "func": [ + f"{participant}_task-movieDM_bold", + ], + } + for modality, file_list in files.items(): + modality_dir = bids_dir / participant / modality + modality_dir.mkdir(parents=True, exist_ok=True) + for file_base in file_list: + # Download JSON files from S3 + json_file = modality_dir / f"{file_base}.json" + s3_key = f"{s3_prefix}/{modality}/{file_base}.json" + try: + s3_client.download_file(s3_bucket, s3_key, str(json_file)) + except Exception as e: + # If download fails, create empty JSON + warn("Failed to download %s: %s" % (s3_key, e)) + json_file.write_text("{}") + nii_file = modality_dir / f"{file_base}.nii.gz" + nii_file.touch() + return str(bids_dir)