Skip to content

Commit 489d67c

Browse files
Merge pull request #76 from neuro-ml/luna25
LUNA25
2 parents fa2c144 + 438ba49 commit 489d67c

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

amid/luna25.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from datetime import datetime
2+
from functools import cached_property
3+
from typing import Dict, NamedTuple, Sequence
4+
5+
import numpy as np
6+
import pandas as pd
7+
import SimpleITK as sitk
8+
from dpipe.im.box import limit_box
9+
from dpipe.itertools import collect
10+
11+
from .internals import Dataset, field, licenses, register
12+
13+
14+
class NoduleBlock(NamedTuple):
15+
image: np.ndarray
16+
metadata: Dict
17+
18+
19+
class LUNA25Nodule(NamedTuple):
20+
coords: Sequence[float]
21+
lesion_id: int
22+
annotation_id: str
23+
nodule_id: str
24+
malignancy: bool
25+
center_voxel: Sequence[float]
26+
bbox: np.ndarray
27+
28+
29+
@register(
30+
body_region='Chest',
31+
license=licenses.CC_BY_40,
32+
link='https://luna25.grand-challenge.org/',
33+
modality='CT',
34+
prep_data_size=None,
35+
raw_data_size=None,
36+
task='Lung nodule malignancy risk estimation',
37+
)
38+
class LUNA25(Dataset):
39+
"""
40+
The LUNA25 Challenge dataset is a comprehensive collection designed to support
41+
the development and validation of AI algorithms for lung nodule malignancy risk
42+
estimation using low-dose chest CT scans. In total, it contains 2120 patients
43+
and 4069 low-dose chest CT scans, with 555 annotated malignant nodules and
44+
5608 benign nodules (3762 unique nodules, 348 of them are malignant).
45+
The dataset was acquired in participants who enrolled in the
46+
National Lung Cancer Screening Trial (NLST) between 2002 and 2004 in
47+
one of the 33 centers in the United States.
48+
49+
Parameters
50+
----------
51+
root : str, Path, optional
52+
path to the folder containing the raw downloaded archives.
53+
If not provided, the cache is assumed to be already populated.
54+
55+
"""
56+
57+
@property
58+
def ids(self):
59+
return [file.name[: -len('.mha')] for file in (self.root / 'luna25_images').iterdir()]
60+
61+
def _image(self, i):
62+
return sitk.ReadImage(self.root / f'luna25_images/{i}.mha')
63+
64+
@field
65+
def image(self, i):
66+
return sitk.GetArrayFromImage(self._image(i))
67+
68+
@field
69+
def spacing(self, i):
70+
return self._image(i).GetSpacing()[::-1]
71+
72+
def _image_origin(self, i):
73+
return self._image(i).GetOrigin()[::-1]
74+
75+
def _direction(self, i):
76+
return self._image(i).GetDirection()[::-1]
77+
78+
@cached_property
79+
def _data(self):
80+
return pd.read_csv(self.root / 'LUNA25_Public_Training_Development_Data.csv')
81+
82+
def _data_rows(self, i):
83+
return self._data[self._data['SeriesInstanceUID'] == i]
84+
85+
def _data_column_value(self, i, column_name):
86+
values = self._data_rows(i).get(column_name).unique()
87+
assert len(values) == 1
88+
value = values[0]
89+
assert not pd.isnull(value)
90+
return value
91+
92+
@field
93+
def patient_id(self, i):
94+
return str(self._data_column_value(i, 'PatientID'))
95+
96+
@field
97+
def study_date(self, i):
98+
study_date = str(self._data_column_value(i, 'StudyDate'))
99+
return datetime.strptime(study_date, "%Y%m%d").date()
100+
101+
@field
102+
def age(self, i):
103+
return self._data_column_value(i, 'Age_at_StudyDate')
104+
105+
@field
106+
def sex(self, i):
107+
return self._data_column_value(i, 'Gender')
108+
109+
@field
110+
@collect
111+
def nodules(self, i):
112+
for row in self._data_rows(i).itertuples():
113+
coords = np.array([row.CoordX, row.CoordY, row.CoordZ])
114+
nodule_block_metadata = self.nodule_block_metadata(row.AnnotationID)
115+
assert np.all(nodule_block_metadata['spacing'] == self.spacing(i))
116+
image_origin = self._image_origin(i)
117+
direction = np.array(self._direction(i)[::4])
118+
center_voxel = ((coords[::-1] - image_origin) / self.spacing(i)) * direction
119+
bbox_start_point = ((nodule_block_metadata['origin'] - image_origin) / self.spacing(i)) * direction
120+
bbox = limit_box([bbox_start_point, bbox_start_point + np.array([64, 128, 128])], self.image(i).shape)
121+
yield LUNA25Nodule(
122+
coords=coords,
123+
lesion_id=row.LesionID,
124+
annotation_id=str(row.AnnotationID),
125+
nodule_id=str(row.NoduleID),
126+
malignancy=row.label,
127+
center_voxel=np.round(center_voxel).astype(int),
128+
bbox=np.round(bbox).astype(int),
129+
)
130+
131+
def nodule_block_image(self, annotation_id):
132+
return np.load(self.root / f'luna25_nodule_blocks/image/{annotation_id}.npy')
133+
134+
def nodule_block_metadata(self, annotation_id):
135+
metadata = np.load(self.root / f'luna25_nodule_blocks/metadata/{annotation_id}.npy', allow_pickle=True)
136+
assert metadata.shape == ()
137+
return metadata.item()

0 commit comments

Comments
 (0)