-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathFlashNet_Camera.py
More file actions
142 lines (120 loc) · 4.59 KB
/
FlashNet_Camera.py
File metadata and controls
142 lines (120 loc) · 4.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
By.Wheat
2020.09.02
"""
from dataset import *
from FlashNet.facedet.models.flashnet import FlashNet
from FlashNet.facedet.utils.anchor.prior_box import PriorBox
from FlashNet.facedet.utils.bbox.box_utils import decode
from mmcv import Config
if torch.cuda.is_available():
torch.set_default_tensor_type('torch.cuda.FloatTensor')
def detect(out, priors, variance, top_k=5000, conf_thresh=0.4, nms_thresh=0.3):
loc_data, conf_data = out
"""
1: loc_data, Shape: [batch_num,priors_num,4]
2: conf_data, Shape: [batch_num,priors_num, classes_num]
3: priors_data, Shape: [priors_num,4]
"""
batch_num = conf_data.shape[0]
priors_num = conf_data.shape[1]
classes_num = conf_data.shape[2]
if top_k is None or top_k <= 0:
top_k = priors_num
output = []
# Decode predictions into bboxes.
for i in range(batch_num):
decoded_boxes = decode(loc_data[i], priors, variance)
# For each class, perform nms
conf_scores = conf_data[i].clone().t() # [classes_num, priors_num]
output_each = torch.Tensor()
for cl in range(1, classes_num):
conf_of_cl = conf_scores[cl]
c_mask = conf_of_cl.gt(conf_thresh)
scores = conf_of_cl[c_mask]
if scores.size(0) == 0:
output += [torch.Tensor()]
continue
l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
boxes = decoded_boxes[l_mask].view(-1, 4)
# idx of highest scoring and non-overlapping boxes per class
ids, count = nms(boxes, scores, nms_thresh, top_k)
output_cl = torch.cat((scores[ids[:count]].unsqueeze(1),
boxes[ids[:count]]), 1)
if classes_num > 2:
output_cl = torch.cat((torch.Tensor([cl]).expand(count, 1),
output_cl), 1)
output_each = torch.cat((output_each, output_cl), 0)
output += [output_each]
return output
cfg = Config.fromfile('./FlashNet/facedet/configs/flashnet_1024_2_anchor.py')
rgb_means = (104, 117, 123)
img_dim = cfg['train_cfg']['input_size']
net = FlashNet(phase='test', cfg=cfg['net_cfg'])
net.eval()
pre_solve = BaseTransform((0, 600), (104.0, 117.0, 123.0))
load_weights(net, path.join(WEIGHT_ROOT, 'FlashNet_' + 'WIDER' + '.pth'))
use_pylon = False
if use_pylon:
from pypylon import pylon
camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
camera.Open()
camera.StartGrabbing(pylon.GrabStrategy_OneByOne)
else:
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
t0 = time.time()
count = 0
while True:
if use_pylon:
image = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
image_handle = image
image = image.Array
image = cv2.cvtColor(image, cv2.COLOR_YUV2BGR_Y422)
else:
success, image = camera.read()
x, _, _ = pre_solve(image)
# x = x[:, :, (2, 1, 0)]
x = torch.from_numpy(x).permute(2, 0, 1)
x = x.unsqueeze(0)
if torch.cuda.is_available():
x = x.cuda()
with torch.no_grad():
out = net(x)
priors = PriorBox(cfg['anchor_cfg'], image_size=x.shape[-2:], phase='test').forward()
y = detect(out, priors, cfg['anchor_cfg']['variance'])
detection = y[0]
color = [0, 255, 0]
scale = torch.Tensor(image.shape[1::-1]).repeat(2)
for i in range(detection.shape[0]):
score = detection[i, 0]
display_txt = '%.2f' % score
pt = (detection[i, 1:] * scale).type(torch.int32).cpu().numpy()
coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1
cv2.rectangle(image, (pt[0], pt[1]), (pt[2], pt[3]), color, 2)
cv2.fillPoly(image,
np.array([[[pt[0], pt[1]], [pt[0] + 25, pt[1]], [pt[0] + 25, pt[1] + 15], [pt[0], pt[1] + 15]]]),
color)
inverse_color = [255 - x for x in color]
cv2.putText(image, display_txt, (int(pt[0]), int(pt[1]) + 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.3, inverse_color, lineType=cv2.LINE_AA)
cv2.imshow('image', image)
if use_pylon:
image_handle.Release()
count = (count + 1) % 10
if count == 0:
print('FPS:%.4f' % (10 / (time.time() - t0)))
t0 = time.time()
k = cv2.waitKey(1)
if k == 27:
cv2.destroyAllWindows()
break
elif k == ord("s"):
cv2.imwrite("image2.jpg", image)
cv2.destroyAllWindows()
break
if use_pylon:
camera.Close()
else:
camera.release()