Skip to content

Commit 445ed68

Browse files
committed
Check for valid DICOM to avoid crashes + performance optimization
1 parent ab972e0 commit 445ed68

File tree

2 files changed

+17
-10
lines changed

2 files changed

+17
-10
lines changed

bidscoin/utilities/__init__.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def unpack(sesfolder: Path, wildcard: str='', workfolder: Path='', _subprefix: U
8282
return {sesfolder}, False
8383

8484

85+
# Profiling shows dcmread is currently the most expensive function, therefore the (primitive but effective) _DICOMDICT_CACHE optimization
86+
_DICOMDICT_CACHE = _DICOMFILE_CACHE = None
8587
@lru_cache(maxsize=65536)
8688
def is_dicomfile(file: Path) -> bool:
8789
"""
@@ -93,19 +95,26 @@ def is_dicomfile(file: Path) -> bool:
9395
:return: Returns true if a file is a DICOM-file
9496
"""
9597

98+
global _DICOMDICT_CACHE, _DICOMFILE_CACHE
99+
96100
if file.is_file():
97101

98102
# Perform a quick test first
99-
with file.open('rb') as dicomfile:
100-
dicomfile.seek(0x80, 1)
101-
if dicomfile.read(4) == b'DICM':
103+
with file.open('rb') as fid:
104+
fid.seek(0x80, 1)
105+
if fid.read(4) == b'DICM':
102106
return True
103107

104108
# Perform a proof of the pudding test (but avoid memory problems when reading a very large (e.g. EEG) source file)
105109
if file.suffix.lower() in ('.ima','.dcm','.dicm','.dicom',''):
106110
if file.name == 'DICOMDIR':
107111
return True
108-
dicomdata = dcmread(file, force=True) # The DICM tag may be missing for anonymized DICOM files. NB: Force=False raises an error for non-DICOM files
112+
if file != _DICOMFILE_CACHE:
113+
dicomdata = dcmread(file, force=True) # The DICM tag may be missing for anonymized DICOM files. NB: Force=False raises an error for non-DICOM files
114+
_DICOMDICT_CACHE = dicomdata
115+
_DICOMFILE_CACHE = file
116+
else:
117+
dicomdata = _DICOMDICT_CACHE
109118
return 'Modality' in dicomdata
110119

111120
return False
@@ -208,8 +217,6 @@ def parse_x_protocol(pattern: str, dicomfile: Path) -> str:
208217
return ''
209218

210219

211-
# Profiling shows this is currently the most expensive function, therefore the (primitive but effective) _DICOMDICT_CACHE optimization
212-
_DICOMDICT_CACHE = _DICOMFILE_CACHE = None
213220
@lru_cache(maxsize=65536)
214221
def get_dicomfield(tagname: str, dicomfile: Path) -> Union[str, int]:
215222
"""

bidscoin/utilities/dicomsort.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import sys
1313
sys.path.append(str(Path(__file__).parents[2]))
1414
from bidscoin import bcoin, lsdirs, trackusage
15-
from bidscoin.utilities import get_dicomfield
15+
from bidscoin.utilities import get_dicomfield, is_dicomfile
1616

1717
LOGGER = logging.getLogger(__name__)
1818

@@ -37,7 +37,7 @@ def construct_name(scheme: str, dicomfile: Path, force: bool) -> str:
3737
value = cleanup(get_dicomfield(field, dicomfile))
3838
if not value and value != 0 and field in alternatives.keys():
3939
value = cleanup(get_dicomfield(alternatives[field], dicomfile))
40-
if field == 'SeriesNumber':
40+
if value and field == 'SeriesNumber':
4141
value = int(value.replace('.','')) # Convert the SeriesInstanceUID to an int
4242
if not value and value != 0 and not force:
4343
LOGGER.error(f"Missing '{field}' DICOM field specified in the '{scheme}' folder/naming scheme, cannot find a safe name for: {dicomfile}\n")
@@ -139,7 +139,7 @@ def sortsessions(sourcefolder: Path, subprefix: Union[str,None]='', sesprefix: s
139139
Wrapper around sortsession() to loop over subjects and sessions and map the session DICOM files
140140
141141
:param sourcefolder: The root folder containing the source [sub/][ses/]dicomfiles or the DICOMDIR file
142-
:param subprefix: The prefix for searching the sub folders in session. Use '' to sort DICOMDIR files directly in sourcefolder (None will add DICOMDIR-based sub-/ses-folders
142+
:param subprefix: The prefix for searching the sub folders in session. Use '' to sort DICOMDIR files directly in sourcefolder (None will add DICOMDIR-based sub-/ses-folders)
143143
:param sesprefix: The prefix for searching the ses folders in sub folder
144144
:param folderscheme: Optional naming scheme for the sorted (e.g. Series) subfolders. Follows the Python string formatting syntax with DICOM field names in curly bracers with an optional number of digits for numeric fields', default='{SeriesNumber:03d}-{SeriesDescription}'
145145
:param namescheme: Optional naming scheme for renaming the files. Follows the Python string formatting syntax with DICOM field names in curly bracers, e.g. {PatientName}_{SeriesNumber:03d}_{SeriesDescription}_{AcquisitionNumber:05d}_{InstanceNumber:05d}.IMA
@@ -192,7 +192,7 @@ def sortsessions(sourcefolder: Path, subprefix: Union[str,None]='', sesprefix: s
192192

193193
# Sort the DICOM files in the sourcefolder
194194
else:
195-
dicomfiles = [dcmfile for dcmfile in sourcefolder.glob('**/*' if recursive else '*') if dcmfile.is_file() and re.match(pattern, str(dcmfile))]
195+
dicomfiles = [dcmfile for dcmfile in sourcefolder.glob('**/*' if recursive else '*') if re.match(pattern, str(dcmfile)) and is_dicomfile(dcmfile)]
196196
if dicomfiles:
197197
sortsession(sourcefolder, dicomfiles, folderscheme, namescheme, force, dryrun)
198198
sessions.add(sourcefolder)

0 commit comments

Comments
 (0)