Allow using different data types for MTCNN model.

n1mmy · n1mmy · commit c304896f9630 · 2020-04-17T00:00:15.000-07:00
Use this by calling .half() or .double() on an mtcnn object.

Using .half() reduces GPU memory usage substantially, at the cost of accuracy.
diff --git a/models/utils/detect_face.py b/models/utils/detect_face.py
@@ -22,7 +22,8 @@ def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device):
 
     imgs = torch.as_tensor(imgs, device=device)
 
-    imgs = imgs.permute(0, 3, 1, 2).float()
+    model_dtype = next(pnet.parameters()).dtype
+    imgs = imgs.permute(0, 3, 1, 2).type(model_dtype)
 
     batch_size = len(imgs)
     h, w = imgs.shape[2:4]
@@ -178,7 +179,7 @@ def generateBoundingBox(reg, probs, scale, thresh):
     image_inds = mask_inds[:, 0]
     score = probs[mask]
     reg = reg[:, mask].permute(1, 0)
-    bb = mask_inds[:, 1:].float().flip(1)
+    bb = mask_inds[:, 1:].type(reg.dtype).flip(1)
     q1 = ((stride * bb + 1) / scale).floor()
     q2 = ((stride * bb + cellsize - 1 + 1) / scale).floor()
     boundingbox = torch.cat([q1, q2, score.unsqueeze(1), reg], dim=1)