Skip to content

Commit 12df5a1

Browse files
committed
Adds multi-threaded wado downloads
1 parent 217bd85 commit 12df5a1

9 files changed

Lines changed: 289 additions & 67 deletions

File tree

HISTORY.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# History
22

3+
## v0.4.0 (2021-04-05)
4+
5+
* Added multi-threaded wado downloads
6+
* Slight changes to dicomtrolley methods
7+
8+
39
## v0.3.0 (2021-04-01)
410

511
* Added examples

dicomtrolley/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"""Retrieve medical images via DICOM-QR and DICOMweb"""
2-
__version__ = "v0.3.0"
2+
__version__ = "v0.4.0"

dicomtrolley/core.py

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
1-
"""Combines WADO and MINT to make make getting DICOM studies easy"""
1+
"""Combines WADO and MINT to make make getting DICOM studies easy
2+
3+
Notes
4+
-----
5+
Design choices:
6+
7+
WADO and MINT modules should remain un-entangled so are not allowed to use
8+
each other's classes. Core has knowledge of both an can convert classes between the
9+
two if needed
10+
11+
"""
212

3-
from itertools import chain
413
from pathlib import Path
5-
from typing import List, Sequence
14+
from typing import List, Sequence, Union
615

716
from dicomtrolley.mint import (
817
Mint,
@@ -11,17 +20,11 @@
1120
MintStudy,
1221
)
1322
from dicomtrolley.query import Query, QueryLevels
14-
from dicomtrolley.wado import Wado
23+
from dicomtrolley.wado import InstanceReference, Wado
1524

1625

1726
class Trolley:
18-
"""Combines WADO and MINT to make make getting DICOM studies easy
19-
20-
Offers three different types of functions:
21-
* Find - Search for things on server, but do not download yet
22-
* Download - Quick methods that download some data to disk
23-
* Get - Return pydicom datasets directly without writing to disk
24-
"""
27+
"""Combines WADO and MINT to make make getting DICOM studies easy"""
2528

2629
def __init__(self, wado: Wado, mint: Mint):
2730
self.wado = wado
@@ -61,7 +64,7 @@ def download_study(self, study_instance_uid, output_dir):
6164
storage.save(dataset)
6265

6366
def fetch_all_datasets(self, mint_objects: List[MintObject]):
64-
"""Get full DICOM dataset for each instance in study. Calls mint and wado
67+
"""Get full DICOM dataset for each instance in study
6568
6669
Parameters
6770
----------
@@ -73,36 +76,66 @@ def fetch_all_datasets(self, mint_objects: List[MintObject]):
7376
Generator[Dataset, None, None]
7477
The downloaded dataset and the context that was used to download it
7578
"""
76-
77-
for instance in self.extract_instances(mint_objects):
78-
yield self.get_dataset(instance)
79+
for ds in self.wado.datasets(self.extract_instances(mint_objects)):
80+
yield ds
7981

8082
@staticmethod
81-
def extract_instances(mint_objects: Sequence[MintObject]):
82-
"""Get all individual instances from input.
83+
def extract_instances(
84+
objects: Sequence[Union[MintObject, InstanceReference]]
85+
):
86+
"""Get all individual instances from input
87+
88+
A pre-processing step for getting datasets
8389
8490
Parameters
8591
----------
86-
mint_objects: Sequence[MintObject]
92+
objects: Sequence[MintObject]
8793
Any combination of MintStudy, MintSeries and MintInstance instances
8894
89-
A pre-processing step for getting datasets
95+
Returns
96+
-------
97+
List[InstanceReference]
98+
A reference to each instance (slice)
99+
90100
"""
91-
return list(chain(*(x.all_instances() for x in mint_objects)))
101+
instances = []
102+
for item in objects:
103+
if isinstance(item, InstanceReference):
104+
instances.append(item)
105+
else:
106+
instances = instances + [
107+
to_reference(x) for x in item.all_instances()
108+
]
109+
return instances
110+
111+
def fetch_all_datasets_async(self, mint_objects, max_workers=None):
112+
"""Get full DICOM dataset for each instance in study using multiple threads
113+
114+
Parameters
115+
----------
116+
mint_objects: List[MintObject]
117+
get dataset for each instance contained in these objects
118+
max_workers: int, optional
119+
Max number of ThreadPoolExecutor workers to use. Defaults to
120+
ThreadPoolExecutor default
92121
93-
def get_dataset(self, instance: MintInstance):
94-
"""Get all DICOM data for this instance from server
122+
Raises
123+
------
124+
DICOMTrolleyException
125+
If getting or parsing of any instance fails
95126
96127
Returns
97128
-------
98-
Dataset
129+
Iterator[Dataset, None, None]
130+
The downloaded dataset and the context that was used to download it
131+
99132
"""
100133

101-
return self.wado.get_dataset(
102-
study_instance_uid=instance.parent.parent.uid,
103-
series_instance_uid=instance.parent.uid,
104-
sop_instance_iud=instance.uid,
105-
)
134+
for ds in self.wado.datasets_async(
135+
instances=self.extract_instances(mint_objects),
136+
max_workers=max_workers,
137+
):
138+
yield ds
106139

107140

108141
class DICOMStorageDir:
@@ -133,4 +166,15 @@ def generate_path(self, dataset):
133166
def get_value(dataset, tag_name):
134167
"""Extract value for use in path. If not found return default"""
135168
default = "unknown"
136-
return dataset.get(tag_name, default).replace(".", "_")
169+
return str(dataset.get(tag_name, default)).replace(".", "_")
170+
171+
172+
def to_reference(instance: MintInstance) -> InstanceReference:
173+
"""Simplify a more extensive MINT instance to an InstanceReference
174+
needed for calls to WADO functions
175+
"""
176+
return InstanceReference(
177+
study_instance_uid=instance.parent.parent.uid,
178+
series_instance_uid=instance.parent.uid,
179+
sop_instance_iud=instance.uid,
180+
)

dicomtrolley/wado.py

Lines changed: 119 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,33 @@
22
33
https://www.dicomstandard.org/dicomweb/retrieve-wado-rs-and-wado-uri/
44
"""
5+
from concurrent.futures import as_completed
6+
from concurrent.futures.thread import ThreadPoolExecutor
7+
from typing import Sequence
8+
9+
from pydantic.dataclasses import dataclass
10+
from pydicom.dataset import Dataset
511
from pydicom.errors import InvalidDicomError
612
from pydicom.filebase import DicomBytesIO
713
from pydicom.filereader import dcmread
14+
from requests.models import Response
15+
from requests_futures.sessions import FuturesSession
816

917
from dicomtrolley.exceptions import DICOMTrolleyException
1018

1119

20+
@dataclass()
21+
class InstanceReference:
22+
"""All information needed to download a single slice (SOPInstance) in WADO"""
23+
24+
study_instance_uid: str
25+
series_instance_uid: str
26+
sop_instance_iud: str
27+
28+
def __str__(self):
29+
return f"InstanceReference {self.sop_instance_iud}"
30+
31+
1232
class Wado:
1333
"""A connection to a WADO server"""
1434

@@ -25,35 +45,116 @@ def __init__(self, session, url):
2545
self.session = session
2646
self.url = url
2747

28-
def get_dataset(
29-
self, study_instance_uid, series_instance_uid, sop_instance_iud
30-
):
31-
"""Get all DICOM data the given instance (slice)
48+
@staticmethod
49+
def to_wado_parameters(instance):
50+
"""WADO url parameters for to retrieve instance
3251
3352
Returns
3453
-------
35-
Dataset
36-
A pydicom dataset
54+
Dict[str]
55+
All parameters for a standard WADO get request
56+
"""
57+
return {
58+
"requestType": "WADO",
59+
"studyUID": instance.study_instance_uid,
60+
"seriesUID": instance.series_instance_uid,
61+
"objectUID": instance.sop_instance_iud,
62+
"contentType": "application/dicom",
63+
}
64+
65+
@staticmethod
66+
def parse_wado_response(response: Response) -> Dataset:
67+
"""Create a Dataset out of http response from WADO server
3768
3869
Raises
3970
------
4071
DICOMTrolleyException
41-
If getting does not work for some reason
72+
If response is not as expected or if parsing fails
4273
74+
Returns
75+
-------
76+
Dataset
4377
"""
78+
if response.status_code != 200:
4479

45-
response = self.session.get(
46-
self.url,
47-
params={
48-
"requestType": "WADO",
49-
"studyUID": study_instance_uid,
50-
"seriesUID": series_instance_uid,
51-
"objectUID": sop_instance_iud,
52-
"contentType": "application/dicom",
53-
},
54-
)
80+
raise DICOMTrolleyException(
81+
f"Calling {response.url} failed ({response.status_code} - "
82+
f"{response.reason})\n"
83+
f"response content was {str(response.content[:300])}"
84+
)
5585
raw = DicomBytesIO(response.content)
5686
try:
5787
return dcmread(raw)
5888
except InvalidDicomError as e:
59-
raise DICOMTrolleyException(f"Error retrieving instance: {e}")
89+
raise DICOMTrolleyException(
90+
f"Error parsing response as dicom: {e}."
91+
f" Response content (first 300 elements) was"
92+
f" {str(response.content[:300])}"
93+
)
94+
95+
def get_dataset(self, instance: InstanceReference):
96+
"""Get DICOM dataset for the given instance (slice)
97+
98+
Raises
99+
------
100+
DICOMTrolleyException
101+
If getting does not work for some reason
102+
103+
Returns
104+
-------
105+
Dataset
106+
A pydicom dataset
107+
"""
108+
return self.parse_wado_response(
109+
self.session.get(
110+
self.url, params=self.to_wado_parameters(instance)
111+
)
112+
)
113+
114+
def datasets(self, instances: Sequence[InstanceReference]):
115+
"""Retrieve each instance via WADO
116+
117+
Returns
118+
-------
119+
Iterator[Dataset, None, None]
120+
"""
121+
for instance in instances:
122+
yield self.get_dataset(instance)
123+
124+
def datasets_async(
125+
self, instances: Sequence[InstanceReference], max_workers=None
126+
):
127+
"""Retrieve each instance via WADO
128+
129+
Parameters
130+
----------
131+
instances: Sequence[InstanceReference]
132+
Retrieve dataset for each of these instances
133+
max_workers: int, optional
134+
Use this number of workers in ThreadPoolExecutor. Defaults to
135+
default for ThreadPoolExecutor
136+
137+
Raises
138+
------
139+
DICOMTrolleyException
140+
When a server response cannot be parsed as DICOM
141+
142+
Returns
143+
-------
144+
Iterator[Dataset, None, None]
145+
"""
146+
147+
with FuturesSession(
148+
session=self.session,
149+
executor=ThreadPoolExecutor(max_workers=max_workers),
150+
) as futures_session:
151+
futures = []
152+
for instance in instances:
153+
futures.append(
154+
futures_session.get(
155+
self.url, params=self.to_wado_parameters(instance)
156+
)
157+
)
158+
159+
for future in as_completed(futures):
160+
yield self.parse_wado_response(future.result())

examples/go_shopping.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,16 @@
3232

3333
print(f"Found {len(studies)} studies. Taking one with least instances")
3434
studies.sort(key=lambda x: x.data.NumberOfStudyRelatedInstances)
35-
study = studies[0]
35+
study = studies[1]
3636

3737
print(f"Getting slice info for {study}")
3838
details = trolley.find_studies(
3939
Query(studyInstanceUID=study.uid, queryLevel=QueryLevels.INSTANCE)
4040
)
41-
instances = trolley.extract_instances(details)
42-
print(f"Got {len(instances)} instances for {study}")
4341

4442
storage = DICOMStorageDir("/tmp/trolley")
4543
print(f"Saving datasets to {storage}")
46-
for instance in instances:
47-
print(f"downloading {instance}")
48-
storage.save(trolley.get_dataset(instance))
49-
44+
for ds in trolley.fetch_all_datasets_async(details, max_workers=8):
45+
print(f"downloaded {ds.SOPInstanceUID}")
46+
storage.save(ds)
5047
print("Done")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dicomtrolley"
3-
version = "v0.3.0"
3+
version = "v0.4.0"
44
description = "Retrieve medical images via DICOM-QR and DICOMweb"
55
authors = ["sjoerdk <sjoerd.kerkstra@radboudumc.nl>"]
66
readme = "README.md"

0 commit comments

Comments
 (0)