diff --git a/data/participants.tsv b/data/participants.tsv index 8fd9e09..42e9d14 100644 --- a/data/participants.tsv +++ b/data/participants.tsv @@ -1,2 +1,3 @@ participant_id height weight age gender sub-01 178 58 28 male +sub-02 178 58 28 male diff --git a/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.json b/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.json new file mode 100644 index 0000000..ce19c70 --- /dev/null +++ b/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.json @@ -0,0 +1,69 @@ +{ + "Modality": "MR", + "MagneticFieldStrength": 3, + "ImagingFrequency": 123.254, + "Manufacturer": "Siemens", + "ManufacturersModelName": "Prisma_fit", + "InstitutionName": "Rigshospitalet", + "InstitutionalDepartmentName": "Rontgen", + "InstitutionAddress": "Blegdamsvej 9,Kobenhavn,District,DK,DK-2100", + "DeviceSerialNumber": "67006", + "StationName": "MN213418", + "BodyPartExamined": "HEAD", + "PatientPosition": "HFS", + "ProcedureStepDescription": "MR cerebrum", + "SoftwareVersions": "syngo MR D13D", + "MRAcquisitionType": "3D", + "SeriesDescription": "t1_mpr_3d_sag_p2_iso_Siemens", + "ProtocolName": "t1_mpr_3d_sag_p2_iso_Siemens", + "ScanningSequence": "GR\\IR", + "SequenceVariant": "SK\\SP\\MP", + "ScanOptions": "IR", + "SequenceName": "*tfl3d1rs16ns", + "ImageType": ["ORIGINAL", "PRIMARY", "M", "NORM", "DIS3D", "DIS2D"], + "SeriesNumber": 3, + "AcquisitionTime": "15:31:52.362500", + "AcquisitionNumber": 1, + "SliceThickness": 1, + "SAR": 0.0754335, + "EchoTime": 0.00252, + "RepetitionTime": 1.9, + "InversionTime": 0.9, + "FlipAngle": 9, + "PartialFourier": 1, + "BaseResolution": 256, + "ShimSetting": [ + -5020, + -11544, + -9996, + 104, + 27, + -15, + -3, + -8 ], + "TxRefAmp": 261.958, + "PhaseResolution": 1, + "ReceiveCoilName": "HeadNeck_64", + "ReceiveCoilActiveElements": "HC1-7;NC1,2", + "PulseSequenceDetails": "%SiemensSeq%\\tfl", + "RefLinesPE": 24, + "ConsistencyInfo": "N4_VD13D_LATEST_20130810", + "PercentPhaseFOV": 100, + "PercentSampling": 100, + "PhaseEncodingSteps": 255, + "AcquisitionMatrixPE": 256, + "ReconMatrixPE": 256, + "ParallelReductionFactorInPlane": 2, + "PixelBandwidth": 170, + "DwellTime": 1.15e-05, + "ImageOrientationPatientDICOM": [ + 0.050593, + 0.998719, + 5.87093e-08, + -0.0348548, + 0.00176573, + -0.999391 ], + "InPlanePhaseEncodingDirectionDICOM": "ROW", + "ConversionSoftware": "dcm2niix", + "ConversionSoftwareVersion": "v1.0.20210317" +} diff --git a/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.nii b/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.nii new file mode 100644 index 0000000..34cf6ed Binary files /dev/null and b/data/sub-02/ses-baseline/anat/sub-02_ses-baseline_T1w.nii differ diff --git a/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.json b/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.json new file mode 100644 index 0000000..c69fbb7 --- /dev/null +++ b/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.json @@ -0,0 +1,235 @@ +{ + "Manufacturer": "Siemens", + "ManufacturersModelName": "HR+", + "Units": "Bq/mL", + "BodyPart": "Brain", + "TracerName": "DASB", + "TracerRadionuclide": "C11", + "TracerMolecularWeight": 282.39, + "TracerMolecularWeightUnits": "g/mol", + "InjectedRadioactivity": 629.74, + "InjectedRadioactivityUnits": "MBq", + "MolarActivity": 55.5, + "MolarActivityUnits": "MBq/nmol", + "SpecificRadioactivity": 196.53670455752683, + "SpecificRadioactivityUnits": "MBq/ug", + "Purity": 99, + "ModeOfAdministration": "bolus", + "InjectedMass": 3.2041852, + "InjectedMassUnits": "ug", + "AcquisitionMode": "list mode", + "ImageDecayCorrected": true, + "ImageDecayCorrectionTime": 0, + "TimeZero": "17:28:40", + "ScanStart": 0, + "InjectionStart": 0, + "FrameDuration": [ + 20, + 20, + 20, + 60, + 60, + 60, + 120, + 120, + 120, + 300, + 300.066, + 600, + 600, + 600, + 600, + 600, + 600, + 600, + 600, + 600, + 600 + ], + "FrameTimesStart": [ + 0, + 20, + 40, + 60, + 120, + 180, + 240, + 360, + 480, + 600, + 900, + 1200.066, + 1800.066, + 2400.066, + 3000.066, + 3600.066, + 4200.066, + 4800.066, + 5400.066, + 6000.066, + 6600.066 + ], + "ReconMethodParameterLabels": [ + "lower_threshold", + "upper_threshold", + "recon_zoom" + ], + "ReconMethodParameterUnits": [ + "keV", + "keV", + "none" + ], + "ReconMethodParameterValues": [ + 0, + 650, + 3 + ], + "ScaleFactor": [ + 8.548972374455843e-08, + 1.7544691388593492e-07, + 1.3221580275057931e-07, + 1.2703590357432404e-07, + 1.1155360368775291e-07, + 2.2050951997698576e-07, + 2.184752503353593e-07, + 1.7056818535365892e-07, + 1.6606901453997125e-07, + 1.5532630470715958e-07, + 2.19175134930083e-07, + 2.0248222654117853e-07, + 2.277063231304055e-07, + 2.425933018912474e-07, + 2.3802238047210267e-07, + 2.514642005735368e-07, + 2.802861729378492e-07, + 2.797820570776821e-07, + 3.5299004252919985e-07, + 4.6313422785715375e-07, + 4.904185857412813e-07 + ], + "ScatterFraction": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "DecayCorrectionFactor": [ + 1.0056782960891724, + 1.0171427726745605, + 1.0287377834320068, + 1.0522810220718384, + 1.0886797904968262, + 1.1263376474380493, + 1.1851094961166382, + 1.2685142755508423, + 1.3577889204025269, + 1.5278561115264893, + 1.811025857925415, + 2.328737735748291, + 3.271937131881714, + 4.597157001495361, + 6.459125518798828, + 9.075239181518555, + 12.750947952270508, + 17.915414810180664, + 25.1716251373291, + 35.36678695678711, + 49.69125747680664 + ], + "PromptRate": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "RandomRate": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "SinglesRate": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "ReconMethodName": "Vendor", + "ReconFilterType": [ + "Shepp 0.5", + "All-pass 0.4" + ], + "ReconFilterSize": [ + 2.5, + 2 + ], + "AttenuationCorrection": "transmission scan" +} diff --git a/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.nii.gz b/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.nii.gz new file mode 100644 index 0000000..b4154ae Binary files /dev/null and b/data/sub-02/ses-baseline/pet/sub-02_ses-baseline_pet.nii.gz differ diff --git a/petdeface/petdeface.py b/petdeface/petdeface.py index 303ef9f..bbc3266 100755 --- a/petdeface/petdeface.py +++ b/petdeface/petdeface.py @@ -571,7 +571,13 @@ def write_out_dataset_description_json(input_bids_dir, output_bids_dir=None): def wrap_up_defacing( - path_to_dataset, output_dir=None, placement="adjacent", remove_existing=True, participant_label_exclude=[], session_label_exclude=[] + path_to_dataset, + output_dir=None, + placement="adjacent", + remove_existing=True, + participant_label_exclude=[], + session_label_exclude=[], + indexer=None, ): """ This function maps the output of this pipeline to the original dataset and depending on the @@ -609,17 +615,21 @@ def wrap_up_defacing( :type participant_label_exclude: list, optional :param session_label_exclude: Excludes set of sessions from the finalized output :type session_label_exclude: list, optional + :param indexer: Pre-built BIDSLayoutIndexer with exclusion patterns, defaults to None + :type indexer: BIDSLayoutIndexer, optional :raises ValueError: _description_ """ - subjects_to_exclude = [f"sub-{sub}/*" for sub in participant_label_exclude] - sessions_to_exclude = [f"*ses-{ses}/*" for ses in session_label_exclude] - exclude_total = subjects_to_exclude + sessions_to_exclude - - # build an indexer - exclude = BIDSLayoutIndexer(ignore=exclude_total) - - # get bids layout of dataset - layout = BIDSLayout(path_to_dataset, derivatives=True, validate=False, indexer=exclude) + # Use provided indexer or create one from exclude lists (fallback for compatibility) + if indexer is None: + subjects_to_exclude = [f"sub-{sub}/*" for sub in participant_label_exclude] + sessions_to_exclude = [f"*ses-{ses}/*" for ses in session_label_exclude] + exclude_total = subjects_to_exclude + sessions_to_exclude + indexer = BIDSLayoutIndexer(ignore=exclude_total) + + # get bids layout of dataset using the indexer + layout = BIDSLayout( + path_to_dataset, derivatives=True, validate=False, indexer=indexer + ) # collect defaced images try: @@ -632,10 +642,9 @@ def wrap_up_defacing( sys.exit(1) # collect all original images and jsons - raw_only = BIDSLayout(path_to_dataset, derivatives=False, indexer=exclude) - - raw_images_only = raw_only.get( - suffix=["pet", "T1w"]) + raw_only = BIDSLayout(path_to_dataset, derivatives=False, indexer=indexer) + + raw_images_only = raw_only.get(suffix=["pet", "T1w"]) # if output_dir is not None and is not the same as the input dir we want to clear it out if output_dir is not None and output_dir != path_to_dataset and remove_existing: @@ -656,8 +665,27 @@ def wrap_up_defacing( else: final_destination = output_dir - # copy original dataset to new location + # copy original dataset to new location, respecting exclusions for entry in raw_only.files: + # Check if this file belongs to an excluded subject + should_exclude = False + for excluded_subject in participant_label_exclude: + # Handle both cases: excluded_subject with or without 'sub-' prefix + if excluded_subject.startswith('sub-'): + subject_pattern = f"/{excluded_subject}/" + subject_pattern_underscore = f"/{excluded_subject}_" + else: + subject_pattern = f"/sub-{excluded_subject}/" + subject_pattern_underscore = f"/sub-{excluded_subject}_" + + if subject_pattern in entry or subject_pattern_underscore in entry: + should_exclude = True + break + + # Skip excluded subject files, but copy everything else (including dataset-level files) + if should_exclude: + continue + copy_path = entry.replace(str(path_to_dataset), str(final_destination)) pathlib.Path(copy_path).parent.mkdir( parents=True, exist_ok=True, mode=0o775 @@ -810,6 +838,9 @@ def __init__( self.session_label = session_label self.session_label_exclude = session_label_exclude + # Build comprehensive exclusion indexer considering both include and exclude parameters + self.exclude_indexer = self._build_exclusion_indexer() + # check if freesurfer license is valid self.fs_license = check_valid_fs_license() if not self.fs_license: @@ -821,6 +852,64 @@ def __init__( f"Using freesurfer license at {self.fs_license} found in system env at $FREESURFER_LICENSE" ) + def _build_exclusion_indexer(self): + """ + Build a comprehensive BIDSLayoutIndexer that excludes subjects and sessions based on: + 1. Explicit exclusion lists (participant_label_exclude, session_label_exclude) + 2. Implicit exclusions from include-only lists (participant_label, session_label) + + Returns: + BIDSLayoutIndexer: Indexer configured to ignore excluded subjects/sessions + """ + # Create a temporary layout to get all available subjects and sessions + temp_layout = BIDSLayout(self.bids_dir, derivatives=False, validate=False) + all_subjects = temp_layout.get_subjects() + all_sessions = temp_layout.get_sessions() + + # Start with explicitly excluded subjects + excluded_subjects = set(self.participant_label_exclude) + + # If specific subjects are requested (include-only), exclude all others + if self.subject and self.subject != "": + # Handle both string and list formats + if isinstance(self.subject, str): + included_subjects = [self.subject] if self.subject else [] + else: + included_subjects = self.subject + + # Remove 'sub-' prefix if present + included_subjects = [sub.replace("sub-", "") for sub in included_subjects] + + # Add all subjects not in the include list to exclusions + excluded_subjects.update( + sub for sub in all_subjects if sub not in included_subjects + ) + + # Start with explicitly excluded sessions + excluded_sessions = set(self.session_label_exclude) + + # If specific sessions are requested (include-only), exclude all others + if self.session_label: + # Remove 'ses-' prefix if present + included_sessions = [ses.replace("ses-", "") for ses in self.session_label] + + # Add all sessions not in the include list to exclusions + excluded_sessions.update( + ses for ses in all_sessions if ses not in included_sessions + ) + + # Convert to ignore patterns for BIDSLayoutIndexer + ignore_patterns = [] + + # Add subject exclusion patterns + ignore_patterns.extend([f"sub-{sub}/*" for sub in excluded_subjects]) + + # Add session exclusion patterns + ignore_patterns.extend([f"*/ses-{ses}/*" for ses in excluded_sessions]) + + # Build and return the indexer + return BIDSLayoutIndexer(ignore=ignore_patterns) + def run(self): """ Runs the defacing workflow given inputs from instiantiation and wraps up defacing by collecting output @@ -849,7 +938,8 @@ def run(self): placement=self.placement, remove_existing=self.remove_existing, participant_label_exclude=self.participant_label_exclude, - session_label_exclude=self.session_label_exclude + session_label_exclude=self.session_label_exclude, + indexer=self.exclude_indexer, ) diff --git a/tests/test_dir_layouts.py b/tests/test_dir_layouts.py index 60db604..08e7c53 100644 --- a/tests/test_dir_layouts.py +++ b/tests/test_dir_layouts.py @@ -124,6 +124,60 @@ def test_anat_in_subject_folder(): ) petdeface.run() + +def test_participant_exclusion(): + """Test that participant exclusion works correctly by excluding sub-02""" + with tempfile.TemporaryDirectory() as temp_dir: + test_dir = Path(temp_dir) + + # Create the test directory and copy our data + shutil.copytree(data_dir, test_dir / "participant_exclusion") + + # run petdeface on the copied dataset, excluding sub-02 + petdeface = PetDeface( + test_dir / "participant_exclusion", + n_procs=nthreads, + preview_pics=False, + placement="adjacent", + participant_label_exclude=["sub-02"], # Exclude sub-02 + ) + petdeface.run() + + # Check the final defaced dataset directory + final_defaced_dir = test_dir / "participant_exclusion_defaced" + + # Count files in the final defaced dataset + all_files = list(final_defaced_dir.rglob("*")) + all_files = [f for f in all_files if f.is_file()] # Only files, not directories + + # Count files by subject + sub01_files = [f for f in all_files if "sub-01" in str(f)] + sub02_files = [f for f in all_files if "sub-02" in str(f)] + + print(f"Total files in defaced dataset: {len(all_files)}") + print(f"sub-01 files: {len(sub01_files)}") + print(f"sub-02 files: {len(sub02_files)}") + + # Verify that sub-02 does NOT appear anywhere in the final defaced dataset + assert len(sub02_files) == 0, f"sub-02 should be completely excluded from final defaced dataset, but found {len(sub02_files)} files: {[str(f) for f in sub02_files]}" + + # Verify that sub-01 exists and was processed + assert len(sub01_files) > 0, "sub-01 should exist in final defaced dataset" + assert (final_defaced_dir / "sub-01").exists(), "sub-01 directory should exist in final defaced dataset" + + # Verify processing artifacts exist for sub-01 in derivatives + derivatives_dir = final_defaced_dir / "derivatives" / "petdeface" + if derivatives_dir.exists(): + sub01_defacemasks = list(derivatives_dir.glob("**/sub-01*defacemask*")) + sub01_lta_files = list(derivatives_dir.glob("**/sub-01*.lta")) + + print(f"sub-01 defacemasks found: {len(sub01_defacemasks)}") + print(f"sub-01 lta files found: {len(sub01_lta_files)}") + + assert len(sub01_defacemasks) > 0, "sub-01 should have been processed and have defacemasks" + assert len(sub01_lta_files) > 0, "sub-01 should have been processed and have lta registration files" + + def test_no_anat(): # create a temporary directory to copy the existing dataset into with tempfile.TemporaryDirectory() as tmpdir: @@ -136,17 +190,15 @@ def test_no_anat(): # run petdeface on the copied dataset petdeface = PetDeface( Path(tmpdir) / "no_anat", - output_dir=Path(tmpdir) - / "no_anat_defaced" - / "derivatives" - / "petdeface", + output_dir=Path(tmpdir) / "no_anat_defaced" / "derivatives" / "petdeface", n_procs=nthreads, ) - + # now we want to assert that this pipeline crashes and print the error with pytest.raises(FileNotFoundError) as exc_info: petdeface.run() + def test_invalid_bids(): with tempfile.TemporaryDirectory() as tmpdir: shutil.copytree(data_dir, Path(tmpdir) / "invalid") @@ -155,19 +207,17 @@ def test_invalid_bids(): pet_folder = subject_folder / "ses-baseline" / "pet" for file in pet_folder.glob("sub-01_*"): shutil.move( - file, - pet_folder / file.name.replace("sub-01", "sub-01-bestsubject") + file, pet_folder / file.name.replace("sub-01", "sub-01-bestsubject") ) - + # run petdeface on the invalid dataset petdeface = PetDeface( Path(tmpdir) / "invalid", output_dir=Path(tmpdir) / "invalid_defaced" / "derivatives" / "petdeface", n_procs=nthreads, ) - + # Run it and see what error gets raised with pytest.raises(InvalidBIDSDataset) as exc_info: petdeface.run() assert "Dataset at" in str(exc_info.value) - \ No newline at end of file