-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmultiprocessing_low_res_patch_extraction.py
More file actions
134 lines (102 loc) · 4.09 KB
/
multiprocessing_low_res_patch_extraction.py
File metadata and controls
134 lines (102 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 19 12:33:54 2020
@author: leemakri
"""
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 6 12:25:51 2020
@author: leemakri
"""
import os
import pandas as pd
from scipy.io import loadmat
import re
import numpy as np
import multiprocessing
import imageio
from joblib import Parallel, delayed
import time
import sys
print(sys.path)
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1,'/home/leemakri/.local/lib/python3.5/site-packages/')
print(sys.path)
import mat73
#gpu = 1
#os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
def get_random_IF_wsi_region(IF, mask, mask_name,x,y, patch_size=256):
'''
takes a wsi and returns a tissue patch of patch_size
'''
path_IF = '/hdd/leemakri/test_367/367_2/test'
#print('\r',mask_sum,end="")
for x_start in sorted(x):
for y_start in sorted(y):
mask_x_start = int(x_start)
mask_x_stop = int(x_start + patch_size)
mask_y_start = int(y_start)
mask_y_stop = int(y_start + patch_size)
mask_sum = np.sum(mask[mask_x_start:mask_x_stop,mask_y_start:mask_y_stop])
if mask_sum > 0 :
print(x_start,y_start)
region_IF = IF[mask_x_start:mask_x_stop,mask_y_start:mask_y_stop]
print(region_IF.shape)
imageio.imwrite(os.path.join(path_IF , "patch_[{1},{2}]".format(mask_name,x_start,y_start) + '.png'), region_IF)
def find_points(key,points_img):
x = []
for items in points_img.index:
x.append(points_img[key][items])
return set(x)
def find_tissue_mask(mask):
mask[mask==0] = 0
mask[mask==255] = 1
return mask
def read_IF_mask(parentDataDir):
files = []
for path, subdirs, file in sorted(os.walk(parentDataDir)):
files.append(sorted(file))
print(files)
try:
print("Reading IF")
#change the second index based on the IF image index in the IF dir
data_dict_image = mat73.loadmat('/hdd/leemakri/data_control/IF/' + files[-1][0])
print("Reading mask")
#change the second index based on the mask index in the MASK dir
data_dict_mask = mat73.loadmat('/hdd/leemakri/data_control/MASK/' + files[-2][0])
except TypeError:
print("Reading IF")
#change the second index based on the IF image index in the IF dir
data_dict_image = loadmat('/hdd/leemakri/data_control/IF/' + files[-1][0])
print("Reading mask")
#change the second index based on the mask index in the MASK dir
data_dict_mask = loadmat('/hdd/leemakri/data_control/MASK/' + files[-2][0])
#change the second index based on the mask index in the MASK dir
mask_filename = files[-2][0]
mask_name = re.split('\.',mask_filename.split('k')[1])
return data_dict_image,data_dict_mask, mask_name
if __name__ == '__main__':
parentDataDir = '/hdd/leemakri/data_control'
num_cores = multiprocessing.cpu_count()
with open("patch_points1_256_50.csv", 'r') as file:
df = pd.read_csv(file)
df.columns = ['x','y']
start=time.time()
data_dict_image,data_dict_mask, mask_name = read_IF_mask(parentDataDir)
image = list(data_dict_image.values())[-1]
mask = list(data_dict_mask.values())[-1]
#Uncomment these lines for mat files that are not saved in mat73 format and comment the above two lines
# image = data_dict_image['registrated1']
# mask = data_dict_mask['registrated1']
print(image.shape[0])
print(image.shape[1])
points_img = df[(df.x <= image.shape[0]-256)].x
points_img_y = df[(df.y <= image.shape[1]-256)].y
points_img = points_img.to_frame()
points_img_y = points_img_y.to_frame()
x = find_points('x',points_img)
y = find_points('y',points_img_y)
mask_IF = find_tissue_mask(mask)
Parallel(n_jobs=num_cores)(delayed(get_random_IF_wsi_region)(IF=image,mask=mask_IF,mask_name=mask_name[0],x=x,y=y))
print('Time for WSI chopping: ' + str(time.time()-start))
print("Patches Extracted!"+ mask_name[0])