|
| 1 | +import cv2 |
| 2 | +import torch |
| 3 | +import numpy as np |
| 4 | + |
| 5 | +def draw_border(img, pt1, pt2, color, thickness, r, d): |
| 6 | + x1,y1 = pt1 |
| 7 | + x2,y2 = pt2 |
| 8 | + |
| 9 | + # Top left |
| 10 | + cv2.line(img, (x1 + r, y1), (x1 + r + d, y1), color, thickness) |
| 11 | + cv2.line(img, (x1, y1 + r), (x1, y1 + r + d), color, thickness) |
| 12 | + cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness) |
| 13 | + |
| 14 | + # Top right |
| 15 | + cv2.line(img, (x2 - r, y1), (x2 - r - d, y1), color, thickness) |
| 16 | + cv2.line(img, (x2, y1 + r), (x2, y1 + r + d), color, thickness) |
| 17 | + cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness) |
| 18 | + |
| 19 | + # Bottom left |
| 20 | + cv2.line(img, (x1 + r, y2), (x1 + r + d, y2), color, thickness) |
| 21 | + cv2.line(img, (x1, y2 - r), (x1, y2 - r - d), color, thickness) |
| 22 | + cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness) |
| 23 | + |
| 24 | + # Bottom right |
| 25 | + cv2.line(img, (x2 - r, y2), (x2 - r - d, y2), color, thickness) |
| 26 | + cv2.line(img, (x2, y2 - r), (x2, y2 - r - d), color, thickness) |
| 27 | + cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness) |
| 28 | + |
| 29 | + |
| 30 | +def tensor_to_bgr(frame_tensor, *, undo_normalise=False, mean=None, std=None): |
| 31 | + """ |
| 32 | + Args |
| 33 | + ---- |
| 34 | + frame_tensor : torch.Tensor |
| 35 | + (C,H,W) or (1,C,H,W) ― float or half ― RGB |
| 36 | + undo_normalise : bool |
| 37 | + True if you previously applied (x - mean) / std |
| 38 | + mean, std : list/tuple of 3 floats |
| 39 | + Same numbers you used for normalising (e.g. ImageNet) |
| 40 | + Returns |
| 41 | + ------- |
| 42 | + frame_bgr : np.ndarray (H,W,3) uint8 BGR contiguous |
| 43 | + """ |
| 44 | + # 1) squeeze batch dimension if present |
| 45 | + if frame_tensor.ndim == 4: |
| 46 | + frame_tensor = frame_tensor[0] |
| 47 | + |
| 48 | + # 2) move to CPU & float32 for math |
| 49 | + img = frame_tensor.detach() |
| 50 | + |
| 51 | + # 3) (optional) reverse mean/std normalisation |
| 52 | + if undo_normalise: |
| 53 | + if mean is None or std is None: |
| 54 | + raise ValueError("Supply mean and std to undo normalisation") |
| 55 | + mean = torch.tensor(mean).to(img).view(3,1,1) |
| 56 | + std = torch.tensor(std).to(img).view(3,1,1) |
| 57 | + img = img * std + mean |
| 58 | + |
| 59 | + # 4) scale back to 0‑255, clamp, uint8 |
| 60 | + img = (img * 255.0) |
| 61 | + # img = img # .to(torch.float16) |
| 62 | + img = img.clamp(0,255).byte() |
| 63 | + |
| 64 | + # 5) channel‑last & numpy |
| 65 | + img = img.permute(1,2,0).cpu().numpy() # H,W,C RGB |
| 66 | + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # → BGR |
| 67 | + img = np.ascontiguousarray(img) # ensure OpenCV‑happy |
| 68 | + return img |
| 69 | + |
| 70 | + |
| 71 | +def load_model(model_path): |
| 72 | + model = torch.jit.load(model_path) |
| 73 | + model.to(torch.device('mps')) |
| 74 | + model.eval() |
| 75 | + return model |
| 76 | + |
| 77 | +def frame_to_tensor(frame): |
| 78 | + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| 79 | + |
| 80 | + # 3) Ensure the array is contiguous (torch needs it) ------------------------- |
| 81 | + frame = np.ascontiguousarray(frame_rgb) |
| 82 | + |
| 83 | + # 4) numpy -> torch, move channels, scale, add batch if wanted -------------- |
| 84 | + tensor = torch.from_numpy(frame) # H x W x C, uint8 → int tensor |
| 85 | + tensor = tensor.to('mps', non_blocking=True) |
| 86 | + |
| 87 | + tensor = tensor.permute(2, 0, 1) # C x H x W |
| 88 | + tensor = tensor.to(torch.float32).div(255.0) # float32, [0,1] |
| 89 | + |
| 90 | + # 5) (Optional) add a batch dim and push to GPU ------------------------------ |
| 91 | + tensor = tensor.unsqueeze(0) # 1 x C x H x W |
| 92 | + return tensor.to(torch.float16) |
| 93 | + |
| 94 | +def model_inference(model, tensor): |
| 95 | + return model(tensor) / 255.0 |
| 96 | + |
| 97 | + |
| 98 | +def main(): |
| 99 | + # Load pre-trained Haar cascade classifier for frontal face detection |
| 100 | + # haarcascade_profileface |
| 101 | + # haarcascade_frontalface_default |
| 102 | + face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') |
| 103 | + |
| 104 | + # Start video capture from the default webcam (device 0) |
| 105 | + cap = cv2.VideoCapture(0) |
| 106 | + if not cap.isOpened(): |
| 107 | + print("Error: Could not open video capture") |
| 108 | + return |
| 109 | + |
| 110 | + # starry_v_bt4_1e10_ep2_float16 |
| 111 | + # nature_oil_painting_ep3_bt4_sw3e10_cw_1e5_float16 |
| 112 | + # starry_ep3_bt4_sw1e11_cw_1e5_float16 <- one of the better ones |
| 113 | + # nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16 |
| 114 | + model = load_model('../models/exports/mps/starry_ep3_bt4_sw1e11_cw_1e5_float16.pt') |
| 115 | + |
| 116 | + print("Press 'q' to quit") |
| 117 | + while True: |
| 118 | + # Read a frame from the webcam |
| 119 | + ret, frame = cap.read() |
| 120 | + if not ret: |
| 121 | + print("Error: Failed to read frame from webcam") |
| 122 | + break |
| 123 | + |
| 124 | + # Convert the frame to grayscale (face detector expects gray images) |
| 125 | + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) |
| 126 | + |
| 127 | + # Detect faces in the grayscale image |
| 128 | + for n in [5,3,1,0]: |
| 129 | + faces = face_cascade.detectMultiScale( |
| 130 | + gray, |
| 131 | + scaleFactor=1.3, |
| 132 | + minNeighbors=n, |
| 133 | + minSize=(30, 30), |
| 134 | + flags=cv2.CASCADE_SCALE_IMAGE |
| 135 | + ) |
| 136 | + if len(faces): |
| 137 | + break |
| 138 | + |
| 139 | + x_grow = 1.6 |
| 140 | + y_grow = 1.9 |
| 141 | + height, width, channels = frame.shape |
| 142 | + |
| 143 | + face_bounds = [] |
| 144 | + for (i, (x, y, w, h)) in enumerate(faces): |
| 145 | + # Calculate the center of the face |
| 146 | + center_x = x + w // 2 |
| 147 | + center_y = y + h // 2 |
| 148 | + |
| 149 | + # Calculate the new width and height |
| 150 | + new_w = int(w * x_grow) |
| 151 | + new_h = int(h * y_grow) |
| 152 | + |
| 153 | + # Calculate the new top-left corner |
| 154 | + new_x = max(0, center_x - new_w // 2) |
| 155 | + new_y = max(0, center_y - new_h // 2) |
| 156 | + |
| 157 | + # Ensure the new bounding box is within the image boundaries |
| 158 | + new_x = min(new_x, width - new_w) |
| 159 | + new_y = min(new_y, height - new_h) |
| 160 | + |
| 161 | + face_bounds.append((new_x, new_y, new_w, new_h)) |
| 162 | + |
| 163 | + # Draw bounding boxes around detected faces |
| 164 | + for (x, y, w, h) in face_bounds: |
| 165 | + face_roi = frame[y:y+h, x:x+w] |
| 166 | + # Apply style transfer to the face region |
| 167 | + input_tensor = frame_to_tensor(face_roi) |
| 168 | + output_tensor = model_inference(model, input_tensor)[:,:,0:h, 0:w] |
| 169 | + # print(output_tensor.shape) |
| 170 | + output_face = tensor_to_bgr(output_tensor) |
| 171 | + # Replace the face region in the original frame with the stylized face |
| 172 | + frame[y:y+h, x:x+w] = output_face # output_face[:h, :w] |
| 173 | + # draw_border(frame, (x, y), (x + w, y + h), (255,255,127), 2, 20, 20) |
| 174 | + # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) |
| 175 | + |
| 176 | + # Display the resulting frame |
| 177 | + cv2.imshow('Webcam Face Detection', frame) |
| 178 | + |
| 179 | + # Exit loop when 'q' key is pressed |
| 180 | + if cv2.waitKey(1) & 0xFF == ord('q'): |
| 181 | + break |
| 182 | + |
| 183 | + # Release resources |
| 184 | + cap.release() |
| 185 | + cv2.destroyAllWindows() |
| 186 | + |
| 187 | +if __name__ == '__main__': |
| 188 | + main() |
0 commit comments