-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathcompute_pathology_features.py
More file actions
121 lines (102 loc) · 4.75 KB
/
compute_pathology_features.py
File metadata and controls
121 lines (102 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Compute four pathological features: (1) number of instances (for example, bilateral Pleural Effusion would have two instances, whereas there is only one instance for Cardiomegaly), (2) size (pathology area with respect to the area of the whole CXR), (3) elongation and (4) irrectangularity (the last two features measure the complexity of the pathology shape and were calculated by fitting a rectangle of minimum area enclosing the binary mask).
Note that we use the ground-truth annotations to extract the number of instances, and we use the ground-truth segmentation masks to calculate area, elongation and rectangularity. We chose to extract number of instances from annotations because sometimes radiologists draw two instances for a pathology that are overlapping; in this case, the number of annotations would be 2, but the number of segmentations would be 1.
"""
from argparse import ArgumentParser
import cv2
import glob
import json
import numpy as np
import pandas as pd
import pickle
from pycocotools import mask
from eval_constants import LOCALIZATION_TASKS
def get_geometric_features(segm):
"""
Given a segmentation mask, return geometric features.
Args:
segm (np.array): the binary segmentation mask
"""
# load segmentation
rgb_img = cv2.cvtColor(255 * segm, cv2.COLOR_GRAY2RGB)
# find contours
contours, _ = cv2.findContours(segm.copy(), 1, 1)
# get number of instances and area
n_instance = len(contours)
area_ratio = np.sum(segm) / (segm.shape[0] * segm.shape[1])
# use the longest coutour to calculate geometric features
max_idx = np.argmax([len(contour) for contour in contours])
cnt = contours[max_idx]
rect = cv2.minAreaRect(cnt)
(x, y), (w, h), a = rect
instance_area = cv2.contourArea(cnt)
elongation = max(w, h) / min(w, h)
rec_area_ratio = instance_area / (w * h)
return n_instance, area_ratio, elongation, rec_area_ratio
def main(args):
# load ground-truth annotations (needed to extract number of instances)
# and ground-truth segmentations
with open(args.gt_ann) as f:
gt_ann = json.load(f)
with open(args.gt_seg) as f:
gt_seg = json.load(f)
# extract features from all cxrs with at least one pathology
all_instances = {}
all_areas = {}
all_elongations = {}
all_rec_area_ratios = {}
all_ids = sorted(gt_ann.keys())
pos_ids = sorted(gt_seg.keys())
for task in sorted(LOCALIZATION_TASKS):
print(task)
n_instances = []
areas = []
elongations = []
rec_area_ratios = []
for img_id in all_ids:
n_instance = 0
area = 0
elongation = np.nan
rec_area_ratio = np.nan
# calculate features for cxr with a pathology segmentation
if img_id in pos_ids:
gt_item = gt_seg[img_id][task]
gt_mask = mask.decode(gt_item)
if np.sum(gt_mask) > 0:
# use annotation to get number of instances
n_instance = len(gt_ann[img_id][task]) \
if task in gt_ann[img_id] else 0
# use segmentation to get other features
n_instance_segm, area, elongation, rec_area_ratio = \
get_geometric_features(gt_mask)
n_instances.append(n_instance)
areas.append(area)
elongations.append(elongation)
rec_area_ratios.append(rec_area_ratio)
all_instances[task] = n_instances
all_areas[task] = areas
all_elongations[task] = elongations
all_rec_area_ratios[task] = rec_area_ratios
instance_df = pd.DataFrame(all_instances)
area_df = pd.DataFrame(all_areas)
elongation_df = pd.DataFrame(all_elongations)
rec_area_ratio_df = pd.DataFrame(all_rec_area_ratios)
instance_df['img_id'] = all_ids
area_df['img_id'] = all_ids
elongation_df['img_id'] = all_ids
rec_area_ratio_df['img_id'] = all_ids
instance_df.to_csv(f'{args.save_dir}/num_instances.csv', index=False)
area_df.to_csv(f'{args.save_dir}/area_ratio.csv', index=False)
elongation_df.to_csv(f'{args.save_dir}/elongation.csv', index=False)
rec_area_ratio_df.to_csv(f'{args.save_dir}/rec_area_ratio.csv', index=False)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--gt_ann', type=str,
help='path to json file with raw ground-truth annotations')
parser.add_argument('--gt_seg', type=str,
help='path to json file with ground-truth segmentations \
(encoded)')
parser.add_argument('--save_dir', default='.',
help='where to save feature dataframes')
args = parser.parse_args()
main(args)