DAMS/Merged.py at main · Trident02/DAMS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import cv2
import dlib
import numpy as np
from collections import deque
import time
import mediapipe as mp

# Load the shape predictor model
predictor_path = 'shape_predictor_68_face_landmarks.dat'
predictor = dlib.shape_predictor(predictor_path)

# Initialize the dlib face detector
detector = dlib.get_frontal_face_detector()

# Initialize variables for pupil detection
pupil_history = []  # List to store the history of pupil positions
num_frames_accumulated = 2  # Number of frames to accumulate for stabilization

# Initialize the deque to store pupil positions for the last 10 seconds
position_history = deque()  # Assuming 10 frames per second


# Function to calculate the eye aspect ratio (EAR)
def eye_aspect_ratio(eye_landmarks):
    # Compute the Euclidean distances between the two sets of vertical eye landmarks (x, y) coordinates
    A = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
    B = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])

    # Compute the Euclidean distance between the horizontal eye landmark (x, y) coordinates
    C = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])

    # Calculate the eye aspect ratio
    ear = (A + B) / (2.0 * C)
    return ear


# Function to detect and locate pupils
def detect_pupils(image, shape):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Calculate the eye aspect ratio for both eyes
    left_eye_landmarks = np.array(shape[36:42])
    right_eye_landmarks = np.array(shape[42:48])
    left_ear = eye_aspect_ratio(left_eye_landmarks)
    right_ear = eye_aspect_ratio(right_eye_landmarks)

    # Define the thresholds for closed eyes
    ear_threshold = 0.2

    # Check if the eyes are closed or partially closed
    if left_ear < ear_threshold:
        left_eye_landmarks = np.concatenate([left_eye_landmarks, [shape[36]]])
    if right_ear < ear_threshold:
        right_eye_landmarks = np.concatenate([right_eye_landmarks, [shape[42]]])

    # Calculate the eye region of interest (ROI)
    left_eye_roi = cv2.boundingRect(left_eye_landmarks)
    right_eye_roi = cv2.boundingRect(right_eye_landmarks)

    # Get the eye regions from the grayscale image
    left_eye_region = gray[left_eye_roi[1]:left_eye_roi[1] + left_eye_roi[3],
                      left_eye_roi[0]:left_eye_roi[0] + left_eye_roi[2]]
    right_eye_region = gray[right_eye_roi[1]:right_eye_roi[1] + right_eye_roi[3],
                       right_eye_roi[0]:right_eye_roi[0] + right_eye_roi[2]]

    # Apply adaptive thresholding to enhance the pupil/iris contrast
    _, left_eye_thresh = cv2.threshold(left_eye_region, 30, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    _, right_eye_thresh = cv2.threshold(right_eye_region, 30, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)

    # Find contours in the thresholded images
    left_eye_contours, _ = cv2.findContours(left_eye_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    right_eye_contours, _ = cv2.findContours(right_eye_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Find the contour with the largest area in each eye region
    if len(left_eye_contours) > 0:
        left_eye_contour = max(left_eye_contours, key=cv2.contourArea)
        ((x, y), left_eye_radius) = cv2.minEnclosingCircle(left_eye_contour)
        left_eye_center = (int(x) + left_eye_roi[0], int(y) + left_eye_roi[1])
    else:
        left_eye_center = (left_eye_roi[0] + left_eye_roi[2] // 2, left_eye_roi[1] + left_eye_roi[3] // 2)

    if len(right_eye_contours) > 0:
        right_eye_contour = max(right_eye_contours, key=cv2.contourArea)
        ((x, y), right_eye_radius) = cv2.minEnclosingCircle(right_eye_contour)
        right_eye_center = (int(x) + right_eye_roi[0], int(y) + right_eye_roi[1])
    else:
        right_eye_center = (right_eye_roi[0] + right_eye_roi[2] // 2, right_eye_roi[1] + right_eye_roi[3] // 2)

    return left_eye_center, right_eye_center


# Function to classify the position of the pupil
def classify_pupil_position(avg_left_eye, avg_right_eye, displacement_threshold):
    left_displacement = avg_left_eye[0] - left_eye[0]
    right_displacement = right_eye[0] - avg_right_eye[0]

    left_eye_landmarks = np.array(shape[36:42])
    right_eye_landmarks = np.array(shape[42:48])


    # Check if the average pupil position is closer to contour points 40 and 46 (left eye)
    if ((np.linalg.norm(right_eye_landmarks[3] - avg_right_eye)) - (np.linalg.norm(avg_right_eye - right_eye_landmarks[0]))) > displacement_threshold:
        return "Right"
    # Check if the average pupil position is closer to contour points 37 and 43 (right eye)
    elif ((np.linalg.norm(avg_right_eye - right_eye_landmarks[0])) - (np.linalg.norm(right_eye_landmarks[3] - avg_right_eye))) > displacement_threshold:
        return "Left"
    else:
        return "Center"

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_drawing = mp.solutions.drawing_utils

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

# Open a video capture object to capture video from a webcam (index 0)
cap = cv2.VideoCapture(1)

while cap.isOpened():
    ret, frame = cap.read()  # Read a frame from the video capture

    image = frame.copy()

    # Get the result
    results = face_mesh.process(image)

    img_h, img_w, img_c = image.shape
    face_3d = []
    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)

                    x, y = int(lm.x * img_w), int(lm.y * img_h)

                    # Get the 2D Coordinates
                    face_2d.append([x, y])

                    # Get the 3D Coordinates
                    face_3d.append([x, y, lm.z])

            # Convert it to the NumPy array
            face_2d = np.array(face_2d, dtype=np.float64)

            # Convert it to the NumPy array
            face_3d = np.array(face_3d, dtype=np.float64)

            # The camera matrix
            focal_length = 1 * img_w
            cam_matrix = np.array([[focal_length, 0, img_w / 2],
                                   [0, focal_length, img_h / 2],
                                   [0, 0, 1]])

            # The distortion parameters
            dist_matrix = np.zeros((4, 1), dtype=np.float64)

            # Solve PnP
            success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)

            if success:
                # Get rotational matrix
                rmat, jac = cv2.Rodrigues(rot_vec)

                # Get angles
                angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

                # Get the y rotation degree
                x = angles[0] * 360
                y = angles[1] * 360
                z = angles[2] * 360

                # See where the user's head tilting
                if y < -10:
                    text = "Looking Right"
                elif y > 10:
                    text = "Looking Left"
                elif x < -10:
                    text = "Looking Down"
                elif x > 10:
                    text = "Looking Up"
                else:
                    text = "Forward"

                # Display the nose direction
                nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
                p1 = (int(nose_2d[0]), int(nose_2d[1]))
                p2 = (int(nose_2d[0] + y * 10), int(nose_2d[1] - x * 10))

                cv2.line(image, p1, p2, (255, 0, 0), 3)

                # Add the text on the image
                cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)

    if ret:
        # Detect faces in the current frame
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert the frame to grayscale
        faces = detector(gray)  # Use the face detector to detect faces in the grayscale frame

        for face in faces:
            # Determine facial landmarks for the detected face
            shape = predictor(gray, face)
            shape = [(p.x, p.y) for p in shape.parts()]

            # Detect the pupils using the facial landmarks
            left_eye, right_eye = detect_pupils(frame, shape)

            # Accumulate pupil positions for stabilization
            pupil_history.append((left_eye, right_eye))
            if len(pupil_history) > num_frames_accumulated:
                pupil_history.pop(0)

            # Calculate the average pupil position
            avg_left_eye = np.mean(pupil_history, axis=0, dtype=int)[0]
            avg_right_eye = np.mean(pupil_history, axis=0, dtype=int)[1]

            # Calculate the eye aspect ratio for both eyes
            left_eye_landmarks = np.array(shape[36:42])
            right_eye_landmarks = np.array(shape[42:48])
            left_ear = eye_aspect_ratio(left_eye_landmarks)
            right_ear = eye_aspect_ratio(right_eye_landmarks)

            # Check if the eyes are closed
            ear_threshold = 0.2
            if left_ear < ear_threshold or right_ear < ear_threshold:
                # Eyes are closed, skip drawing the rectangle boxes and red pupil marks
                continue

            # Draw circles around the averaged pupil positions
            cv2.circle(frame, tuple(avg_left_eye), 3, (0, 0, 255), -1)
            cv2.circle(frame, tuple(avg_right_eye), 3, (0, 0, 255), -1)

            # Classify the position of the pupil
            displacement_threshold = 8  # Adjust this threshold as needed
            position = classify_pupil_position(avg_left_eye, avg_right_eye, displacement_threshold)
            if position != "Center":
                position_history.append(time.time())
            else:
                position_history.clear()

            # Check if the position has not been "Center" for a long time
            attention_time_threshold = 5  # Time threshold in seconds
            if len(position_history) > 0 and (time.time() - position_history[0]) > attention_time_threshold:
                # Display "Attention!" on the frame
                cv2.putText(frame, "Attention!", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

            # Draw the position text on the frame
            cv2.putText(frame, f"Pupil Position: {position}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('Pupil Position Detection', frame)
    cv2.imshow('Head Pose Estimation', image)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close the windows
cap.release()
cv2.destroyAllWindows()