diff --git a/cubids/cli.py b/cubids/cli.py index 4e634cba..4d43a0b0 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -535,6 +535,20 @@ def _parse_apply(): required=False, ) + parser.add_argument( + "--n-cpus", + "--n_cpus", + type=int, + action="store", + dest="n_cpus", + default=1, + help=( + "Number of CPUs to use for datalad jobs. " + "Used as --jobs for datalad save and datalad run." + ), + required=False, + ) + return parser diff --git a/cubids/cubids.py b/cubids/cubids.py index f2fb0e44..4f0b16aa 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -377,7 +377,7 @@ def add_nifti_info(self, n_cpus=1): print("nothing to save, working tree clean") else: # Use parallel jobs for DataLad save - dl_jobs = n_cpus if n_cpus and n_cpus > 1 else None + dl_jobs = n_cpus if n_cpus and n_cpus > 1 else 1 self.datalad_save(message="Added nifti info to sidecars", jobs=dl_jobs) self.reset_bids_layout() @@ -426,12 +426,12 @@ def add_file_collections(self, n_cpus=1): json.dump(data, f, sort_keys=True, indent=4) if self.use_datalad: - dl_jobs = n_cpus if n_cpus and n_cpus > 1 else None + dl_jobs = n_cpus if n_cpus and n_cpus > 1 else 1 self.datalad_save(message="Added file collection metadata to sidecars", jobs=dl_jobs) self.reset_bids_layout() - def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): + def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True, n_cpus=1): """Apply changes documented in the edited summary tsv and generate the new tsv files. This function looks at the RenameEntitySet and MergeInto @@ -513,6 +513,9 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # orig key/param tuples that will have new entity set to_change = list(entity_sets.keys()) + # Build an index of IntendedFor references once (reused during renames) + intended_for_index = self._build_intendedfor_index() + for row in range(len(files_df)): file_path = self.path + files_df.loc[row, "FilePath"] if Path(file_path).exists() and "/fmap/" not in file_path: @@ -526,7 +529,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T new_entities = utils._entity_set_to_entities(new_key) # generate new filenames according to new entity set - self.change_filename(file_path, new_entities) + self.change_filename(file_path, new_entities, intended_for_index) # create string of mv command ; mv command for dlapi.run for from_file, to_file in zip(self.old_filenames, self.new_filenames): @@ -552,14 +555,29 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T s1 = "Renamed IntendedFor references to " s2 = "Variant Group scans" IF_rename_msg = s1 + s2 - self.datalad_handle.save(message=IF_rename_msg) + dl_jobs = n_cpus if n_cpus and n_cpus > 1 else 1 + self.datalad_save(message=IF_rename_msg, jobs=dl_jobs) s1 = "Renamed Variant Group scans according to their variant " s2 = "parameters" rename_commit = s1 + s2 - self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) + # Use datalad run with --jobs for parallel get/save + dl_jobs = n_cpus if n_cpus and n_cpus > 1 else 1 + subprocess.run( + [ + "datalad", + "run", + "-m", + rename_commit, + "-J", + str(dl_jobs), + "bash", + renames, + ], + cwd=self.path, + ) else: subprocess.run( ["bash", renames], @@ -575,7 +593,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # remove renames file that gets created under the hood subprocess.run(["rm", "-rf", "renames"]) - def change_filename(self, filepath, entities): + def change_filename(self, filepath, entities, intended_for_index=None): """Apply changes to a filename based on the renamed entity sets. This function takes into account the new entity set names @@ -605,32 +623,16 @@ def change_filename(self, filepath, entities): suffix = entities["suffix"] - sub = utils.get_entity_value(filepath, "sub") - if self.is_longitudinal: - ses = utils.get_entity_value(filepath, "ses") - # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) self.new_filenames.append(new_path) - # NOW NEED TO RENAME ASSOCIATED FILES - # bids_file = self.layout.get_file(filepath) - bids_file = filepath - # associations = bids_file.get_associations() - associations = self.get_nifti_associations(str(bids_file)) - for assoc_path in associations: - # assoc_path = assoc.path - if Path(assoc_path).exists(): - # print("FILE: ", filepath) - # print("ASSOC: ", assoc.path) - # ensure assoc not an IntendedFor reference - if ".nii" not in str(assoc_path): - self.old_filenames.append(assoc_path) - new_ext_path = utils.img_to_new_ext( - new_path, - "".join(Path(assoc_path).suffixes), - ) - self.new_filenames.append(new_ext_path) + # Deterministically add key associated files without global scans + # Sidecar JSON + sidecar_old = utils.img_to_new_ext(filepath, ".json") + if Path(sidecar_old).exists(): + self.old_filenames.append(sidecar_old) + self.new_filenames.append(utils.img_to_new_ext(new_path, ".json")) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! # Update DWI-specific files @@ -649,7 +651,7 @@ def change_filename(self, filepath, entities): self.new_filenames.append(bvec_new) # Update func-specific files - # now rename _events and _physio files! + # now rename _events, _sbref, and _physio files! old_suffix = parse_file_entities(filepath)["suffix"] scan_end = "_" + old_suffix + old_ext @@ -668,6 +670,21 @@ def change_filename(self, filepath, entities): new_ejson = new_path.replace(new_scan_end, "_events.json") self.new_filenames.append(new_ejson) + # Handle _sbref.nii.gz and _sbref.json files + old_sbref_nii = filepath.replace(scan_end, "_sbref.nii.gz") + if Path(old_sbref_nii).exists(): + self.old_filenames.append(old_sbref_nii) + new_scan_end = "_" + suffix + old_ext + new_sbref_nii = new_path.replace(new_scan_end, "_sbref.nii.gz") + self.new_filenames.append(new_sbref_nii) + + old_sbref_json = filepath.replace(scan_end, "_sbref.json") + if Path(old_sbref_json).exists(): + self.old_filenames.append(old_sbref_json) + new_scan_end = "_" + suffix + old_ext + new_sbref_json = new_path.replace(new_scan_end, "_sbref.json") + self.new_filenames.append(new_sbref_json) + old_physio = filepath.replace(scan_end, "_physio.tsv.gz") if Path(old_physio).exists(): self.old_filenames.append(old_physio) @@ -696,42 +713,51 @@ def change_filename(self, filepath, entities): new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) - # RENAME INTENDED FORS! - if self.is_longitudinal: - ses_path = self.path + "/" + sub + "/" + ses - elif not self.is_longitudinal: - ses_path = self.path + "/" + sub - files_with_if = [] - files_with_if += Path(ses_path).rglob("fmap/*.json") - files_with_if += Path(ses_path).rglob("perf/*_m0scan.json") - for path_with_if in files_with_if: - filename_with_if = str(path_with_if) - self.IF_rename_paths.append(filename_with_if) - # json_file = self.layout.get_file(filename_with_if) - # data = json_file.get_dict() - data = utils.get_sidecar_metadata(filename_with_if) + # RENAME INTENDED FORS using prebuilt index when available + if intended_for_index is None: + intended_for_index = self._build_intendedfor_index() + + old_rel = utils._get_participant_relative_path(filepath) + old_bidsuri = utils._get_bidsuri(filepath, self.path) + new_rel = utils._get_participant_relative_path(new_path) + new_bidsuri = utils._get_bidsuri(new_path, self.path) + + jsons_to_update = set() + for key in (old_rel, old_bidsuri): + if key in intended_for_index: + for jf in intended_for_index[key]: + jsons_to_update.add(jf) + + for jf in jsons_to_update: + self.IF_rename_paths.append(jf) + data = utils.get_sidecar_metadata(jf) if data == "Erroneous sidecar": - print("Error parsing sidecar: ", filename_with_if) + print("Error parsing sidecar: ", jf) continue - - if "IntendedFor" in data.keys(): - # Coerce IntendedFor to a list. - data["IntendedFor"] = listify(data["IntendedFor"]) - for item in data["IntendedFor"]: - if item == utils._get_participant_relative_path(filepath): - # remove old filename - data["IntendedFor"].remove(item) - # add new filename - data["IntendedFor"].append(utils._get_participant_relative_path(new_path)) - - if item == utils._get_bidsuri(filepath, self.path): - # remove old filename - data["IntendedFor"].remove(item) - # add new filename - data["IntendedFor"].append(utils._get_bidsuri(new_path, self.path)) - - # update the json with the new data dictionary - utils._update_json(filename_with_if, data) + if "IntendedFor" not in data: + continue + items = listify(data["IntendedFor"]) or [] + changed = False + # Track which format was present to preserve it + had_rel = old_rel in items + had_bidsuri = old_bidsuri in items + # Remove old references (both styles) + while old_rel in items: + items.remove(old_rel) + changed = True + while old_bidsuri in items: + items.remove(old_bidsuri) + changed = True + # Append new references in the same format(s) that were originally present + if had_rel and new_rel not in items: + items.append(new_rel) + changed = True + if had_bidsuri and new_bidsuri not in items: + items.append(new_bidsuri) + changed = True + if changed: + data["IntendedFor"] = items + utils._update_json(jf, data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -846,30 +872,34 @@ def _purge_associations(self, scans, n_cpus=1): scans : :obj:`list` of :obj:`str` List of file paths to remove from field map JSONs. """ - # truncate all paths to intendedfor reference format - # sub, ses, modality only (no self.path) - if_scans = [] - for scan in scans: - if_scans.append(utils._get_participant_relative_path(self.path + scan)) - - for path in Path(self.path).rglob("sub-*/*/fmap/*.json"): - # json_file = self.layout.get_file(str(path)) - # data = json_file.get_dict() - data = utils.get_sidecar_metadata(str(path)) - if data == "Erroneous sidecar": - print("Error parsing sidecar: ", str(path)) - continue - - # remove scan references in the IntendedFor - if "IntendedFor" in data.keys(): - data["IntendedFor"] = listify(data["IntendedFor"]) + # Build index once; remove IntendedFor references only where present + intended_index = self._build_intendedfor_index() - for item in data["IntendedFor"]: - if item in if_scans: - data["IntendedFor"].remove(item) - - # update the json with the new data dictionary - utils._update_json(str(path), data) + for scan in scans: + old_rel = utils._get_participant_relative_path(scan) + old_bidsuri = utils._get_bidsuri(scan, self.path) + jsons_to_update = set() + for key in (old_rel, old_bidsuri): + for jf in intended_index.get(key, []): + jsons_to_update.add(jf) + for jf in jsons_to_update: + data = utils.get_sidecar_metadata(jf) + if data == "Erroneous sidecar": + print("Error parsing sidecar: ", jf) + continue + if "IntendedFor" not in data: + continue + items = listify(data["IntendedFor"]) or [] + changed = False + while old_rel in items: + items.remove(old_rel) + changed = True + while old_bidsuri in items: + items.remove(old_bidsuri) + changed = True + if changed: + data["IntendedFor"] = items + utils._update_json(jf, data) # save IntendedFor purges so that you can datalad run the # remove association file commands on a clean dataset @@ -878,7 +908,7 @@ def _purge_associations(self, scans, n_cpus=1): s1 = "Purged IntendedFor references to files " s2 = "requested for removal" message = s1 + s2 - dl_jobs = n_cpus if n_cpus and n_cpus > 1 else None + dl_jobs = n_cpus if n_cpus and n_cpus > 1 else 1 self.datalad_save(message=message, jobs=dl_jobs) self.reset_bids_layout() @@ -886,34 +916,51 @@ def _purge_associations(self, scans, n_cpus=1): to_remove = [] - for path in Path(self.path).rglob("sub-*/**/*.nii.gz"): - if str(path) in scans: - # bids_file = self.layout.get_file(str(path)) - # associations = bids_file.get_associations() - associations = self.get_nifti_associations(str(path)) - for assoc in associations: - to_remove.append(assoc) - # filepath = assoc.path - - # ensure association is not an IntendedFor reference! - if ".nii" not in str(path): - if "/dwi/" in str(path): - # add the bval and bvec if there - if Path(utils.img_to_new_ext(str(path), ".bval")).exists(): - to_remove.append(utils.img_to_new_ext(str(path), ".bval")) - if Path(utils.img_to_new_ext(str(path), ".bvec")).exists(): - to_remove.append(utils.img_to_new_ext(str(path), ".bvec")) - - if "/func/" in str(path): - # add tsvs - tsv = utils.img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") - if Path(tsv).exists(): - to_remove.append(tsv) - # add tsv json (if exists) - if Path(tsv.replace(".tsv", ".json")).exists(): - to_remove.append(tsv.replace(".tsv", ".json")) - - to_remove += scans + for scan in scans: + # Sidecar JSON + sidecar = utils.img_to_new_ext(str(scan), ".json") + if Path(sidecar).exists(): + to_remove.append(sidecar) + + # DWI-specific + if "/dwi/" in str(scan): + bval = utils.img_to_new_ext(str(scan), ".bval") + bvec = utils.img_to_new_ext(str(scan), ".bvec") + if Path(bval).exists(): + to_remove.append(bval) + if Path(bvec).exists(): + to_remove.append(bvec) + + # FUNC-specific + if "/func/" in str(scan): + tsv = utils.img_to_new_ext(str(scan), ".tsv").replace("_bold", "_events") + if Path(tsv).exists(): + to_remove.append(tsv) + tsv_json = tsv.replace(".tsv", ".json") + if Path(tsv_json).exists(): + to_remove.append(tsv_json) + # Handle _sbref.nii.gz and _sbref.json files + old_suffix = parse_file_entities(str(scan))["suffix"] + old_ext = "".join(Path(scan).suffixes) + scan_end = "_" + old_suffix + old_ext + sbref_nii = str(scan).replace(scan_end, "_sbref.nii.gz") + if Path(sbref_nii).exists(): + to_remove.append(sbref_nii) + sbref_json = str(scan).replace(scan_end, "_sbref.json") + if Path(sbref_json).exists(): + to_remove.append(sbref_json) + + # PERF-specific + if "/perf/" in str(scan): + if parse_file_entities(str(scan))["suffix"] == "asl": + context = utils.img_to_new_ext(str(scan), "_aslcontext.tsv") + if Path(context).exists(): + to_remove.append(context) + labeling = utils.img_to_new_ext(str(scan), "_asllabeling.jpg") + if Path(labeling).exists(): + to_remove.append(labeling) + + to_remove += list(scans) # create rm commands for all files that need to be purged purge_commands = [] @@ -1023,6 +1070,42 @@ def _cache_fieldmaps(self): # no intended for found return misfits + def _build_intendedfor_index(self): + """Build an index from IntendedFor entries to JSON files that declare them. + + Returns + ------- + dict + Mapping: IntendedFor entry (str) -> list of JSON file paths that include it. + """ + index = defaultdict(set) + # Fieldmap JSONs + for path in Path(self.path).rglob("sub-*/**/fmap/*.json"): + metadata = utils.get_sidecar_metadata(str(path)) + if metadata == "Erroneous sidecar": + print(f"Warning: Failed to parse sidecar metadata from '{path}'.") + continue + if_list = metadata.get("IntendedFor") + items = listify(if_list) + if items is None: + continue + for item in items: + index[str(item)].add(str(path)) + # ASL M0 JSONs may also include IntendedFor + for path in Path(self.path).rglob("sub-*/**/perf/*_m0scan.json"): + metadata = utils.get_sidecar_metadata(str(path)) + if metadata == "Erroneous sidecar": + print(f"Warning: Failed to parse sidecar metadata from '{path}'.") + continue + if_list = metadata.get("IntendedFor") + items = listify(if_list) + if items is None: + continue + for item in items: + index[str(item)].add(str(path)) + # Convert sets to sorted lists for stable behavior + return {k: sorted(v) for k, v in index.items()} + def get_param_groups_from_entity_set(self, entity_set): """Split entity sets into param groups based on json metadata. @@ -1366,9 +1449,9 @@ def get_tsvs(self, path_prefix): with open(summary_json, "w") as outfile: json.dump(summary_dict, outfile, indent=4) - big_df.to_csv(files_tsv, sep="\t", index=False) + big_df.to_csv(files_tsv, sep="\t", index=False, na_rep="n/a") - summary.to_csv(summary_tsv, sep="\t", index=False) + summary.to_csv(summary_tsv, sep="\t", index=False, na_rep="n/a") # Calculate the acq groups group_by_acquisition_sets(files_tsv, path_prefix, self.acq_group_level) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 93204da0..9149533d 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -381,7 +381,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_long # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False, na_rep="n/a") # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary(is_longitudinal) diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index 991954e7..d5da19e9 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -323,12 +323,12 @@ def test_cubids_apply_intendedfor( # Create a CuBIDS summary tsv summary_tsv = tmpdir / "summary.tsv" df = pd.DataFrame(summary_data) - df.to_csv(summary_tsv, sep="\t", index=False) + df.to_csv(summary_tsv, sep="\t", index=False, na_rep="n/a") # Create a CuBIDS files tsv files_tsv = tmpdir / "files.tsv" df = pd.DataFrame(fdata) - df.to_csv(files_tsv, sep="\t", index=False) + df.to_csv(files_tsv, sep="\t", index=False, na_rep="n/a") # Run cubids apply if isinstance(expected, str): diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 5be2ebe0..789fe9a2 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -459,7 +459,7 @@ def test_tsv_merge_no_datalad(tmp_path): summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" - summary_df.to_csv(valid_tsv_file, sep="\t", index=False) + summary_df.to_csv(valid_tsv_file, sep="\t", index=False, na_rep="n/a") # about to apply merges! @@ -472,7 +472,7 @@ def test_tsv_merge_no_datalad(tmp_path): complete_dwi_row ] invalid_tsv_file = tsv_prefix + "_invalid_summary.tsv" - summary_df.to_csv(invalid_tsv_file, sep="\t", index=False) + summary_df.to_csv(invalid_tsv_file, sep="\t", index=False, na_rep="n/a") with pytest.raises(Exception): bod.apply_tsv_changes( @@ -572,7 +572,7 @@ def test_tsv_merge_changes(tmp_path): summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" - summary_df.to_csv(valid_tsv_file, sep="\t", index=False) + summary_df.to_csv(valid_tsv_file, sep="\t", index=False, na_rep="n/a") # about to merge bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) @@ -584,7 +584,7 @@ def test_tsv_merge_changes(tmp_path): complete_dwi_row ] invalid_tsv_file = tsv_prefix + "_invalid_summary.tsv" - summary_df.to_csv(invalid_tsv_file, sep="\t", index=False) + summary_df.to_csv(invalid_tsv_file, sep="\t", index=False, na_rep="n/a") with pytest.raises(Exception): bod.apply_tsv_changes( diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 1a16a37b..ea772bf6 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -108,7 +108,7 @@ def _add_deletion(summary_tsv): """ df = pd.read_table(summary_tsv) df.loc[3, "MergeInto"] = 0 - df.to_csv(summary_tsv, sep="\t", index=False) + df.to_csv(summary_tsv, sep="\t", index=False, na_rep="n/a") return df.loc[3, "KeyParamGroup"] diff --git a/cubids/workflows.py b/cubids/workflows.py index 14a8453c..0c228e71 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -127,34 +127,44 @@ def _link_or_copy(src_path, dst_path): if not os.path.exists(subject_folder_path): os.makedirs(subject_folder_path, exist_ok=True) - # Ensure participants.tsv is available in temp root and is a copy (not a link) - # Always COPY (never link) to avoid modifying the original file when filtering - participants_tsv_path = os.path.join(temporary_bids_dir, "participants.tsv") - # Always remove existing file first in case it was linked in the earlier loop - if os.path.exists(participants_tsv_path): + # Ensure participants.tsv and participants.json are available in temp root + # Always COPY (never link) to avoid modifying the original files when filtering + participants_files = ["participants.tsv", "participants.json"] + for filename in participants_files: + dest_path = os.path.join(temporary_bids_dir, filename) + # Always remove existing file first in case it was linked in the earlier loop + if os.path.exists(dest_path): + try: + os.remove(dest_path) + except Exception as e: + logger.warning( + f"Failed to remove existing file '{dest_path}': {e}. " + "The file may be overwritten or cause conflicts." + ) + # Try to find source file in the provided file list try: - os.remove(participants_tsv_path) - except Exception: # noqa: BLE001 - pass - # Try to find a source participants.tsv in the provided file list - try: - source_participants_tsv_path = None - for candidate_path in files_list: - if os.path.basename(candidate_path) == "participants.tsv": - source_participants_tsv_path = candidate_path - break - # If not in file list, try to get it from the original bids_dir - if not source_participants_tsv_path and bids_dir: - potential_path = os.path.join(bids_dir, "participants.tsv") - if os.path.exists(potential_path): - source_participants_tsv_path = potential_path - if source_participants_tsv_path: - # Always copy (not link) to protect the original file from modification - shutil.copy2(source_participants_tsv_path, participants_tsv_path) - except Exception: # noqa: BLE001 - pass + source_path = None + for candidate_path in files_list: + if os.path.basename(candidate_path) == filename: + source_path = candidate_path + break + # If not in file list, try to get it from the original bids_dir + if not source_path and bids_dir: + potential_path = os.path.join(bids_dir, filename) + if os.path.exists(potential_path): + source_path = potential_path + if source_path: + # Always copy (not link) to protect the original file from modification + shutil.copy2(source_path, dest_path) + except Exception as e: + source_info = source_path if source_path else "unknown location" + logger.warning( + f"Failed to copy '{filename}' from '{source_info}' to '{dest_path}': {e}. " + "The file may be missing or inaccessible." + ) # If participants.tsv exists in the temp BIDS root, filter to current subject + participants_tsv_path = os.path.join(temporary_bids_dir, "participants.tsv") if os.path.exists(participants_tsv_path): try: participants_table = pd.read_csv(participants_tsv_path, sep="\t") @@ -166,10 +176,13 @@ def _link_or_copy(src_path, dst_path): participants_tsv_path, sep="\t", index=False, + na_rep="n/a", ) - except Exception as e: # noqa: F841 - # Non-fatal: continue validation even if filtering fails - pass + except Exception as e: + logger.warning( + f"Failed to filter participants.tsv for subject {subject}: {e}. " + "Continuing validation without filtering." + ) # Run the validator call = build_validator_call( @@ -271,7 +284,7 @@ def validate( else: val_tsv = str(bids_dir) + "/code/CuBIDS/" + str(output_prefix) + "_validation.tsv" - parsed.to_csv(val_tsv, sep="\t", index=False) + parsed.to_csv(val_tsv, sep="\t", index=False, na_rep="n/a") # build validation data dictionary json sidecar val_dict = get_val_dictionary() @@ -363,7 +376,7 @@ def validate( else: val_tsv = str(bids_dir) + "/code/CuBIDS/" + str(output_prefix) + "_validation.tsv" - parsed.to_csv(val_tsv, sep="\t", index=False) + parsed.to_csv(val_tsv, sep="\t", index=False, na_rep="n/a") # build validation data dictionary json sidecar val_dict = get_val_dictionary() @@ -487,6 +500,7 @@ def apply( edited_summary_tsv, files_tsv, new_tsv_prefix, + n_cpus=1, ): """Apply the tsv changes. @@ -525,6 +539,7 @@ def apply( str(files_tsv), str(new_tsv_prefix), raise_on_error=False, + n_cpus=n_cpus, ) diff --git a/docs/example.rst b/docs/example.rst index a8510054..097829be 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -413,6 +413,14 @@ We can execute ``cubids apply`` with the following command: $ cubids apply BIDS_Dataset_DataLad v0_edited_summary.tsv v0_files.tsv v1 --use-datalad +.. note:: + For large datasets, you can speed up DataLad operations by using the ``--n-cpus`` flag + to enable parallel jobs for ``datalad save`` and ``datalad run`` operations. For example, to use 4 CPUs: + + .. code-block:: console + + $ cubids apply BIDS_Dataset_DataLad v0_edited_summary.tsv v0_files.tsv v1 --use-datalad --n-cpus 4 + Checking our git log, we can see that our changes from apply have been saved. .. image:: _static/screenshot_7.png