Skip to content

Commit 3b8bca0

Browse files
authored
Stop adding leading zero to run entity when renaming files (#382)
* Abstract out build_path for easier testing. * Add doctests. * Improve doctests. * Expect error. * Bring tests in line with expected behavior. * More cleanup before the fix. * Fix the error type in other tests. * Push the fix. * Rename get_key_name to get_entity_value.
1 parent 61fed30 commit 3b8bca0

File tree

3 files changed

+181
-57
lines changed

3 files changed

+181
-57
lines changed

cubids/cubids.py

Lines changed: 173 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -469,55 +469,19 @@ def change_filename(self, filepath, entities):
469469
-----
470470
This is the function I need to spend the most time on, since it has entities hardcoded.
471471
"""
472+
new_path = build_path(
473+
filepath=filepath,
474+
entities=entities,
475+
out_dir=str(self.path),
476+
)
477+
472478
exts = Path(filepath).suffixes
473479
old_ext = "".join(exts)
474480

475481
suffix = entities["suffix"]
476-
entity_file_keys = []
477-
478-
# Entities that may be in the filename?
479-
file_keys = ["task", "acquisition", "direction", "reconstruction", "run"]
480-
481-
for key in file_keys:
482-
if key in list(entities.keys()):
483-
entity_file_keys.append(key)
484482

485-
sub = get_key_name(filepath, "sub")
486-
ses = get_key_name(filepath, "ses")
487-
sub_ses = sub + "_" + ses
488-
489-
if "run" in list(entities.keys()) and "run-0" in filepath:
490-
# XXX: This adds an extra leading zero to run.
491-
entities["run"] = "0" + str(entities["run"])
492-
493-
filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys])
494-
filename = (
495-
filename.replace("acquisition", "acq")
496-
.replace("direction", "dir")
497-
.replace("reconstruction", "rec")
498-
)
499-
if len(filename) > 0:
500-
filename = sub_ses + "_" + filename + "_" + suffix + old_ext
501-
else:
502-
raise ValueError(f"Could not construct new filename for {filepath}")
503-
504-
# CHECK TO SEE IF DATATYPE CHANGED
505-
# datatype may be overridden/changed if the original file is located in the wrong folder.
506-
dtypes = ["anat", "func", "perf", "fmap", "dwi"]
507-
dtype_orig = ""
508-
for dtype in dtypes:
509-
if dtype in filepath:
510-
dtype_orig = dtype
511-
512-
if "datatype" in entities.keys():
513-
dtype_new = entities["datatype"]
514-
if entities["datatype"] != dtype_orig:
515-
print("WARNING: DATATYPE CHANGE DETECETD")
516-
else:
517-
dtype_new = dtype_orig
518-
519-
# Construct the new filename
520-
new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename
483+
sub = get_entity_value(filepath, "sub")
484+
ses = get_entity_value(filepath, "ses")
521485

522486
# Add the scan path + new path to the lists of old, new filenames
523487
self.old_filenames.append(filepath)
@@ -1762,9 +1726,173 @@ def img_to_new_ext(img_path, new_ext):
17621726
return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext
17631727

17641728

1765-
def get_key_name(path, key):
1729+
def get_entity_value(path, key):
17661730
"""Given a filepath and BIDS key name, return value."""
17671731
parts = Path(path).parts
17681732
for part in parts:
17691733
if part.startswith(key + "-"):
17701734
return part
1735+
1736+
1737+
def build_path(filepath, entities, out_dir):
1738+
"""Build a new path for a file based on its BIDS entities.
1739+
1740+
Parameters
1741+
----------
1742+
filepath : str
1743+
The original file path.
1744+
entities : dict
1745+
A dictionary of BIDS entities.
1746+
This should include all of the entities in the filename *except* for subject and session.
1747+
out_dir : str
1748+
The output directory for the new file.
1749+
1750+
Returns
1751+
-------
1752+
new_path : str
1753+
The new file path.
1754+
1755+
Examples
1756+
--------
1757+
>>> build_path(
1758+
... "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
1759+
... {"acquisition": "VAR", "suffix": "T2w"},
1760+
... "/output",
1761+
... )
1762+
'/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
1763+
1764+
The function does not add an extra leading zero to the run entity when it's a string.
1765+
>>> build_path(
1766+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
1767+
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
1768+
... "/output",
1769+
... )
1770+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
1771+
1772+
The function adds an extra leading zero to the run entity when it's an integer
1773+
and the original filename has a leading zero.
1774+
>>> build_path(
1775+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
1776+
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
1777+
... "/output",
1778+
... )
1779+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
1780+
1781+
The function does not add an extra leading zero to the run entity when it's an integer
1782+
and the original filename doesn't have a leading zero.
1783+
>>> build_path(
1784+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
1785+
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
1786+
... "/output",
1787+
... )
1788+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
1789+
1790+
The function doesn't add an extra leading zero to the run entity when there isn't a zero.
1791+
>>> build_path(
1792+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
1793+
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
1794+
... "/output",
1795+
... )
1796+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
1797+
1798+
Entities in the original path, but not the entity dictionary, are not included,
1799+
like run in this case.
1800+
>>> build_path(
1801+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
1802+
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
1803+
... "/output",
1804+
... )
1805+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
1806+
1807+
Entities outside of the prescribed list are ignored, such as "subject"...
1808+
>>> build_path(
1809+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
1810+
... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
1811+
... "/output",
1812+
... )
1813+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
1814+
1815+
or "echo".
1816+
>>> build_path(
1817+
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
1818+
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
1819+
... "/output",
1820+
... )
1821+
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
1822+
1823+
It can change the datatype, but will warn the user.
1824+
>>> build_path(
1825+
... "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
1826+
... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
1827+
... "/output",
1828+
... )
1829+
WARNING: DATATYPE CHANGE DETECTED
1830+
'/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
1831+
1832+
It expects a longitudinal structure, so providing a cross-sectional filename won't work.
1833+
XXX: This is a bug.
1834+
>>> build_path(
1835+
... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
1836+
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
1837+
... "/output",
1838+
... )
1839+
Traceback (most recent call last):
1840+
ValueError: Could not extract subject or session from ...
1841+
"""
1842+
exts = Path(filepath).suffixes
1843+
old_ext = "".join(exts)
1844+
1845+
suffix = entities["suffix"]
1846+
entity_file_keys = []
1847+
1848+
# Entities that may be in the filename?
1849+
file_keys = ["task", "acquisition", "direction", "reconstruction", "run"]
1850+
1851+
for key in file_keys:
1852+
if key in list(entities.keys()):
1853+
entity_file_keys.append(key)
1854+
1855+
sub = get_entity_value(filepath, "sub")
1856+
ses = get_entity_value(filepath, "ses")
1857+
if sub is None or ses is None:
1858+
raise ValueError(f"Could not extract subject or session from {filepath}")
1859+
1860+
# Add leading zeros to run entity if it's an integer.
1861+
# If it's a string, respect the value provided.
1862+
if "run" in entities.keys() and isinstance(entities["run"], int):
1863+
# Infer the number of leading zeros needed from the original filename
1864+
n_leading = 2 # default to 1 leading zero
1865+
if "_run-" in filepath:
1866+
run_str = filepath.split("_run-")[1].split("_")[0]
1867+
n_leading = len(run_str)
1868+
entities["run"] = str(entities["run"]).zfill(n_leading)
1869+
1870+
filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys])
1871+
filename = (
1872+
filename.replace("acquisition", "acq")
1873+
.replace("direction", "dir")
1874+
.replace("reconstruction", "rec")
1875+
)
1876+
if len(filename) > 0:
1877+
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
1878+
else:
1879+
raise ValueError(f"Could not construct new filename for {filepath}")
1880+
1881+
# CHECK TO SEE IF DATATYPE CHANGED
1882+
# datatype may be overridden/changed if the original file is located in the wrong folder.
1883+
dtypes = ["anat", "func", "perf", "fmap", "dwi"]
1884+
dtype_orig = ""
1885+
for dtype in dtypes:
1886+
if dtype in filepath:
1887+
dtype_orig = dtype
1888+
1889+
if "datatype" in entities.keys():
1890+
dtype_new = entities["datatype"]
1891+
if entities["datatype"] != dtype_orig:
1892+
print("WARNING: DATATYPE CHANGE DETECTED")
1893+
else:
1894+
dtype_new = dtype_orig
1895+
1896+
# Construct the new filename
1897+
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
1898+
return new_path

cubids/tests/test_apply.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,32 +230,28 @@ def summary_data():
230230
(
231231
"relpath_long",
232232
relpath_intendedfor_long,
233-
# XXX: Should not have extra leading zero in run entity, but that's a known bug.
234-
"ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-001_dwi.nii.gz",
233+
"ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
235234
"pass",
236235
),
237236
(
238237
"bidsuri_long",
239238
bidsuri_intendedfor_long,
240-
# XXX: Should not have extra leading zero in run entity, but that's a known bug.
241-
"bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-001_dwi.nii.gz",
239+
"bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
242240
"pass",
243241
),
244242
(
245243
"relpath_cs",
246244
relpath_intendedfor_cs,
247-
# XXX: Should not have extra leading zero in run entity, but that's a known bug.
248245
# XXX: CuBIDS enforces longitudinal dataset, so this fails.
249-
"dwi/sub-01_acq-VAR_dir-AP_run-001_dwi.nii.gz",
250-
TypeError,
246+
"dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
247+
ValueError,
251248
),
252249
(
253250
"bidsuri_cs",
254251
bidsuri_intendedfor_cs,
255-
# XXX: Should not have extra leading zero in run entity, but that's a known bug.
256252
# XXX: CuBIDS enforces longitudinal dataset, so this fails.
257-
"bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-001_dwi.nii.gz",
258-
TypeError,
253+
"bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
254+
ValueError,
259255
),
260256
],
261257
)

cubids/tests/test_cubids.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@ def _test_img_to_new_ext(cubids_instance):
251251
# Add assertions here
252252

253253

254-
def _test_get_key_name(cubids_instance):
254+
def _test_get_entity_value(cubids_instance):
255255
path = "/path/to/file.nii.gz"
256256
key = "subject"
257-
key_name = cubids_instance.get_key_name(path, key)
257+
key_name = cubids_instance.get_entity_value(path, key)
258258
# Add assertions here

0 commit comments

Comments
 (0)