-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdetect.py
191 lines (156 loc) · 6.83 KB
/
detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# import required libraries
import cv2
from os import unlink
import dlib
import numpy as np
import argparse
from loguru import logger
from model.utilities.utils import remove_files
from model.utilities.image import save_image, draw_rectangles, normalize_histogram
from model.utilities.data import Data, Image
from model.utilities.defaults import MIN_MAX_TEMPLATE
from model.utilities.config import config
class FaceDetector:
"""
Use `dlib's landmark estimation to align faces.
The alignment preprocess faces for input into a neural network.
Faces are resized to the same size (such as 96x96) and transformed
to make landmarks (such as the eyes and nose) appear at the same
location on every image.
Normalized landmarks:
.. image:: ../images/dlib-landmark-mean.png
"""
#: Landmark indices.
INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
OUTER_EYES_AND_NOSE = [36, 45, 33]
def __init__(self, face_predictor):
"""
Instantiate an 'AlignDlib' object.
:param face_predictor: The path to dlib's
:type face_predictor: str
"""
assert face_predictor is not None
self.detector = dlib.get_frontal_face_detector()
self.predictor = dlib.shape_predictor(face_predictor)
def get_all_faces(self, image):
"""
Find all face bounding boxes in an image.
:param image: RGB image to process. Shape: (height, width, 3)
:type image: numpy.ndarray
:return: All face bounding boxes in an image.
:rtype: dlib.rectangles
"""
assert image is not None
image = normalize_histogram(image)
try:
return self.detector(image, 1)
except Exception as e:
logger.warning("Warning: {}".format(e))
# In rare cases, exceptions are thrown.
return []
def get_face(self, image, skip_multi=False):
"""
Find the largest face bounding box in an image.
:param image: RGB image to process. Shape: (height, width, 3)
:type image: numpy.ndarray
:param skip_multi: Skip image if more than one face detected.
:type skip_multi: bool
:return: The largest face bounding box in an image, or None.
:rtype: dlib.rectangle
"""
assert image is not None
faces = self.get_all_faces(image)
if (not skip_multi and len(faces) > 0) or len(faces) == 1:
return max(faces, key=lambda rect: rect.width() * rect.height())
else:
return None
def find_landmarks(self, image, bounding_box):
"""
Find the landmarks of a face.
:param image: RGB image to process. Shape: (height, width, 3)
:type image: numpy.ndarray
:param bounding_box: Bounding box around the face to find landmarks for.
:type bounding_box: dlib.rectangle
:return: Detected landmark locations.
:rtype: list of (x,y) tuples
"""
assert image is not None
assert bounding_box is not None
points = self.predictor(image, bounding_box)
# converting points object to list of (x,y) - coordinates
coordinates = list(map(lambda p: (p.x, p.y), points.parts()))
return coordinates
def align_face(self, image_dimensions, image, bounding_box=None,
landmarks=None, landmark_indices=INNER_EYES_AND_BOTTOM_LIP,
skip_multi=False):
"""
align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
Transform and align a face in an image.
:param image_dimensions: The edge length in pixels of the square the image is resized to.
:type image_dimensions: int
:param image: RGB image to process. Shape: (height, width, 3)
:type image: numpy.ndarray
:param bounding_box: Bounding box around the face to align. \
Defaults to the largest face.
:type bounding_box: dlib.rectangle
:param landmarks: Detected landmark locations. \
Landmarks found on `bb` if not provided.
:type landmarks: list of (x,y) tuples
:param landmark_indices: The indices to transform to.
:type landmark_indices: list of ints
:param skip_multi: Skip image if more than one face detected.
:type skip_multi: bool
:return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
:rtype: numpy.ndarray
"""
assert image_dimensions is not None
assert image is not None
assert landmark_indices is not None
if bounding_box is None:
bounding_box = self.get_face(image, skip_multi)
if bounding_box is None:
return
if landmarks is None:
landmarks = self.find_landmarks(image, bounding_box)
np_landmarks = np.float32(landmarks)
np_landmark_indices = np.array(landmark_indices)
H = cv2.getAffineTransform(np_landmarks[np_landmark_indices],
image_dimensions * MIN_MAX_TEMPLATE[np_landmark_indices])
thumbnail = cv2.warpAffine(image, H, (image_dimensions, image_dimensions))
return thumbnail
def align_all_faces(self, image, bounding_boxes=None):
if bounding_boxes is None:
bounding_boxes = self.get_all_faces(image)
faces = []
for box in bounding_boxes:
faces.append(self.align_face(image_dimensions=96, image=image, bounding_box=box, landmarks=None,
landmark_indices=self.OUTER_EYES_AND_NOSE))
return faces
def main(arguments):
"""
The module will start its execution from this method : main
if it is executed directly
:return:
"""
remove_files(arguments.output)
data = Data.load(arguments.input, loaders=[Image]).get(Image.__name__)
logger.info('input data: {}'.format(list(map(lambda obj: str(obj), data))))
logger.info('initializing face detector and face landmarks predictor')
detector = FaceDetector(config.DETECTOR)
for file in data:
image = file()
faces = detector.get_all_faces(image=image)
draw_rectangles(image=image, face_locations=faces)
if config.SAVE:
save_image(name=str(file), image=image, path=arguments.output)
if config.REMOVE:
unlink(file.path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='detects faces in the images')
parser.add_argument('-i', '--input', help='path to the input folder or image',
metavar='', type=str, default='test-data/test-image-4.jpg')
parser.add_argument('-o', '--output', help='path to the output folder',
metavar='', type=str, default='output/detector')
args = parser.parse_args(['--image'])
main(args)
# Future Work: Implement Retina Face or MTCNN for detection