#####################################################################

# Task 5 : run a machine learning based invisibility cloak demo using:
#          (a) trained object detection for person detection
#          (b) feathered blenidng for compositing (as before)

#####################################################################

import cv2
import numpy as np

#####################################################################

# define video capture with access to camera 0

camera = cv2.VideoCapture(0, cv2.CAP_V4L)

# define display window

window_name = "Live Camera Input with Invisibility Cloaking"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)

#####################################################################

# first, take an image of the background image

_, background = camera.read()
height, width, _ = background.shape
cv2.imshow("Current Background", background)

##########################################################################

# initialise the object detection neural network (uses Mask R-CNN)

# load configuration and weight files for the Mask R-CNN model

net = cv2.dnn.readNet("mask_rcnn_inception_v2_coco_2018_01_28.pbtxt",
                      "mask_rcnn_inception_v2_coco_2018_01_28/"
                      + "/frozen_inference_graph.pb")

# load names of object classes (types) from file

classesFile = "object_detection_classes_coco.txt"
classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# set up compute target as one of [GPU, OpenCL, CPU] - uncomment as needed

net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)

# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

#####################################################################

# set up an array of colours in order to draw detected object masks

np.random.seed(324)
colors = [np.array([0, 0, 0], np.uint8)]
for i in range(1, len(classes) + 1):
    colors.append((colors[i - 1] +
                   np.random.randint(0, 256, [3],
                   np.uint8)) / 2
                  )
del colors[0]

#####################################################################

# main processing loop

keep_processing = True
do_invisibility = False

while (keep_processing):

    # read an image from the camera

    _, image = camera.read()

    # get image dimensions

    height = image.shape[0]
    width = image.shape[1]

    # set up a foreground mask image (all zeros == black)

    foreground_mask = np.zeros((height, width, 1), np.uint8)

    # create a 4D tensor (OpenCV 'blob') from image frame
    # (N.B. technical aspect: pixels not scaled, image resized)
    tensor = cv2.dnn.blobFromImage(
                image, 1.0, (800, 800), [0, 0, 0],
                swapRB=True, crop=False)

    # set the input to the CNN network
    net.setInput(tensor)

    # runs forward inference to get object masks from final output layer
    boxes, masks = net.forward(['detection_out_final', 'detection_masks'])

    # get number of objects detected
    numDetections = boxes.shape[2]

    # draw segmentation - draw instance segments

    boxesToDraw = []
    for i in range(numDetections):
        box = boxes[0, 0, i]
        mask = masks[i]
        confidence = box[2]
        if confidence > 0.5:

            # **** get object info: type, bounding box

            classId = int(box[1])
            left = int(width * box[3])
            top = int(height * box[4])
            right = int(width * box[5])
            bottom = int(height * box[6])

            # **** check bounding box inside the image width/height

            left = max(0, min(left, width - 1))
            top = max(0, min(top, height - 1))
            right = max(0, min(right, width - 1))
            bottom = max(0, min(bottom, height - 1))

            # **** draw object instance mask

            # get mask, re-size from 28x28 network output
            # to size of bounding box size in image then theshold mask at 0.5

            classMask = mask[classId]
            classMask = cv2.resize(classMask,
                                   (right - left + 1, bottom - top + 1),
                                   cv2.INTER_CUBIC)
            mask = (classMask > 0.5)

            roi = image[top:bottom+1, left:right+1][mask]

            # if invisibility is ON, draw objects into foreground mask
            # otherwise draw them as coloured overlays on the camera image

            if (do_invisibility):
                foreground_mask[top:bottom+1, left:right+1][mask] = 255
            else:
                image[top:bottom+1, left:right+1][mask] = (
                    0.8 * colors[classId] + 0.2 * roi).astype(np.uint8)

    if (do_invisibility):

        # all as per earlier Task 3 and Task 4 code

        # perform morphological opening and dilation on the foreground mask

        foreground_mask_morphed = cv2.morphologyEx(foreground_mask,
                                                   cv2.MORPH_OPEN,
                                                   np.ones((3, 3), np.uint8),
                                                   iterations=5)
        foreground_mask_morphed = cv2.morphologyEx(foreground_mask_morphed,
                                                   cv2.MORPH_DILATE,
                                                   np.ones((3, 3), np.uint8),
                                                   iterations=5)

        # extract the set of contours around the foreground mask and then the
        # convex hull around that set of contours. Update the foreground mask
        # with the convex hull of all the pixels in the region

        contours, _ = cv2.findContours(foreground_mask_morphed,
                                       cv2.RETR_EXTERNAL,
                                       cv2.CHAIN_APPROX_SIMPLE)
        if (len(contours) > 0):
            hull = cv2.convexHull(np.vstack(list(contours[i]
                                            for i in range(len(contours)))))
            cv2.fillPoly(foreground_mask_morphed, [hull], (255, 255, 255))

        # logically invert foreground mask to get the background mask via NOT

        background_mask = cv2.bitwise_not(foreground_mask_morphed)

        # cut out sub-part of the stored background we need using logical AND

        cloaking_fill = cv2.bitwise_and(background, background,
                                        mask=foreground_mask_morphed)

        # construct 3-channel RGB feathered background mask for blending

        foreground_mask_feathered = cv2.blur(foreground_mask_morphed,
                                             (15, 15)) / 255.0
        background_mask_feathered = cv2.blur(background_mask, (15, 15)) / 255.0
        background_mask_feathered = cv2.merge([background_mask_feathered,
                                               background_mask_feathered,
                                               background_mask_feathered])
        foreground_mask_feathered = cv2.merge([foreground_mask_feathered,
                                               foreground_mask_feathered,
                                               foreground_mask_feathered])

        # combine current image with cloaked region via feathered blending

        cloaked_image = ((background_mask_feathered * image) +
                         (foreground_mask_feathered * background)
                         ).astype('uint8')

        # display image with cloaking present

        cv2.imshow(window_name, cloaked_image)

    else:

        # display image with just object masks present

        cv2.imshow(window_name, image)

    # start the event loop - if user presses "x" or ESC then exit
    # wait just 2ms for a key press (as processsing here is slower)

    key = cv2.waitKey(2) & 0xFF

    if (key == ord('x') or key == ord('\x1b')):
        keep_processing = False

    # - if user presses 'i' then turn on/off invisibility

    elif (key == ord('i')):

        do_invisibility = not (do_invisibility)

    # - if user presses "f" then switch to fullscreen

    elif (key == ord('f')):
        print("\n -- toggle fullscreen.")
        last_fs = cv2.getWindowProperty(window_name,
                                        cv2.WND_PROP_FULLSCREEN)
        cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
                              cv2.WINDOW_FULLSCREEN &
                              ~(int(last_fs)))

    # - if user presses space then reset background

    elif (key == ord(' ')):
        print("\n -- reset of background image.")
        _, background = camera.read()
        cv2.imshow("Current Background", background)

#####################################################################

# Author : Toby Breckon
# Copyright (c) 2022-25 Dept Computer Science, Durham University, UK

#####################################################################