-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_features_and_labels.py
More file actions
128 lines (108 loc) · 4.72 KB
/
extract_features_and_labels.py
File metadata and controls
128 lines (108 loc) · 4.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
Extract features and labels from worms in BBBC010
"""
import argparse
import numpy as np
import pandas as pd
from skimage import io, measure
from skimage.feature import graycomatrix, graycoprops
def extract_features_and_labels(well_id, image_path, mask_path, threshold):
"""
Extract features for each object in the image.
Args:
well_id: well identifier
image_path: path to multichannel image (first channel is GFP, second channel is brightfield)
mask_path: path to instance segmentation mask
threshold: intensity threshold for live/dead classification
"""
# Read image and mask
img = io.imread(image_path)
mask = io.imread(mask_path)
print(f'Processing {well_id} ({img.shape}) and corresponding mask ({mask.shape})')
# Extract channels
gfp_channel = img[0, :, :]
bf_channel = img[1, :, :]
# Get region properties
regions = measure.regionprops(mask, intensity_image=bf_channel)
# Extract features and labels
features_list = []
labels_list = []
for region in regions:
object_id = region.label
# Get mask for this object
object_mask = mask == object_id
# Calculate sum of intensities in GFP channel for live/dead classification
background_intensity = np.median(gfp_channel)
object_intensity = np.median(gfp_channel[object_mask])
label = 'dead' if object_intensity > (1.0+threshold) * background_intensity else 'live'
# Calculate texture features
masked_values = bf_channel[object_mask]
min_val, max_val = masked_values.min(), masked_values.max()
if max_val > min_val:
normalized = ((bf_channel - min_val) / (max_val - min_val) * 255).astype(np.uint8)
else:
normalized = np.zeros_like(bf_channel, dtype=np.uint8)
normalized[~object_mask] = 0
glcm = graycomatrix(normalized,
distances=[1],
angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
levels=256,
symmetric=True,
normed=True)
# Average texture features across angles
contrast = graycoprops(glcm, 'contrast')[0].mean()
dissimilarity = graycoprops(glcm, 'dissimilarity')[0].mean()
homogeneity = graycoprops(glcm, 'homogeneity')[0].mean()
energy = graycoprops(glcm, 'energy')[0].mean()
correlation = graycoprops(glcm, 'correlation')[0].mean()
asm = graycoprops(glcm, 'ASM')[0].mean()
# Store label
labels_list.append({
'well_id': well_id,
'object_id': object_id,
'label': label,
'gfp_intensity': object_intensity # Keep this for quality control and debugging
})
# Store regionprops features
features = {
'well_id': well_id,
'object_id': object_id,
# Shape features
'area': region.area,
'perimeter': region.perimeter,
'eccentricity': region.eccentricity,
'solidity': region.solidity,
'extent': region.extent,
'major_axis_length': region.major_axis_length,
'minor_axis_length': region.minor_axis_length,
# Intensity features from brightfield
'mean_intensity': region.mean_intensity,
'max_intensity': region.max_intensity,
'min_intensity': region.min_intensity,
'intensity_std': np.std(bf_channel[object_mask]),
# Texture features
'texture_contrast': contrast,
'texture_dissimilarity': dissimilarity,
'texture_homogeneity': homogeneity,
'texture_energy': energy,
'texture_correlation': correlation,
'texture_asm': asm
}
features_list.append(features)
# Save features and labels to separate files
features_df = pd.DataFrame(features_list)
labels_df = pd.DataFrame(labels_list)
features_df.to_csv(f'{well_id}_features.csv', index=False)
labels_df.to_csv(f'{well_id}_labels.csv', index=False)
print(f'Saved features and labels for {len(features_df)} objects')
print(f'Class distribution: {labels_df.label.value_counts().to_dict()}')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--well_id', required=True)
parser.add_argument('--image', required=True)
parser.add_argument('--mask', required=True)
parser.add_argument('--threshold', type=float, required=True)
args = parser.parse_args()
extract_features_and_labels(args.well_id, args.image, args.mask, args.threshold)
if __name__ == '__main__':
main()