Add segmented style transfer demo, and opencv code test for calling from chapel. didn't work.

Iainmon · Iainmon · commit 9474863e27cd · 2025-05-15T22:25:42.000-07:00
diff --git a/demos/video/style-transfer/build.sh b/demos/video/style-transfer/build.sh
@@ -3,5 +3,7 @@
 
 # /usr/bin/clang++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o $(pkg-config --cflags --libs opencv4)
 
-g++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o $(pkg-config --cflags opencv4)
-chpl mirror.h mirror.o mirror.chpl --fast --print-commands --ldflags $(pkg-config --cflags --libs opencv4) -lstdc++
+SDL2LIBS="-I/opt/homebrew/include -I/opt/homebrew/include/SDL2 -L/opt/homebrew/lib -lSDL2"
+
+g++ -std=c++20 -c -fPIC mirror.cpp -o mirror.o $(pkg-config --cflags opencv4) $(pkg-config --cflags sdl2)
+chpl mirror.h mirror.o mirror.chpl --print-commands --ldflags $(pkg-config --cflags --libs opencv4) -lstdc++
diff --git a/demos/video/style-transfer/face-recognition/face.py b/demos/video/style-transfer/face-recognition/face.py
@@ -0,0 +1,188 @@
+import cv2
+import torch
+import numpy as np
+
+def draw_border(img, pt1, pt2, color, thickness, r, d):
+    x1,y1 = pt1
+    x2,y2 = pt2
+
+    # Top left
+    cv2.line(img, (x1 + r, y1), (x1 + r + d, y1), color, thickness)
+    cv2.line(img, (x1, y1 + r), (x1, y1 + r + d), color, thickness)
+    cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness)
+
+    # Top right
+    cv2.line(img, (x2 - r, y1), (x2 - r - d, y1), color, thickness)
+    cv2.line(img, (x2, y1 + r), (x2, y1 + r + d), color, thickness)
+    cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness)
+
+    # Bottom left
+    cv2.line(img, (x1 + r, y2), (x1 + r + d, y2), color, thickness)
+    cv2.line(img, (x1, y2 - r), (x1, y2 - r - d), color, thickness)
+    cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness)
+
+    # Bottom right
+    cv2.line(img, (x2 - r, y2), (x2 - r - d, y2), color, thickness)
+    cv2.line(img, (x2, y2 - r), (x2, y2 - r - d), color, thickness)
+    cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness)
+
+
+def tensor_to_bgr(frame_tensor, *, undo_normalise=False, mean=None, std=None):
+    """
+    Args
+    ----
+    frame_tensor : torch.Tensor
+        (C,H,W) or (1,C,H,W)   ―  float or half   ―  RGB
+    undo_normalise : bool
+        True if you previously applied (x - mean) / std
+    mean, std : list/tuple of 3 floats
+        Same numbers you used for normalising (e.g. ImageNet)
+    Returns
+    -------
+    frame_bgr : np.ndarray   (H,W,3) uint8   BGR  contiguous
+    """
+    # 1) squeeze batch dimension if present
+    if frame_tensor.ndim == 4:
+        frame_tensor = frame_tensor[0]
+
+    # 2) move to CPU & float32 for math
+    img = frame_tensor.detach()
+
+    # 3) (optional) reverse mean/std normalisation
+    if undo_normalise:
+        if mean is None or std is None:
+            raise ValueError("Supply mean and std to undo normalisation")
+        mean = torch.tensor(mean).to(img).view(3,1,1)
+        std  = torch.tensor(std).to(img).view(3,1,1)
+        img = img * std + mean
+
+    # 4) scale back to 0‑255, clamp, uint8
+    img = (img * 255.0)
+    # img = img # .to(torch.float16)
+    img = img.clamp(0,255).byte()
+
+    # 5) channel‑last & numpy
+    img = img.permute(1,2,0).cpu().numpy()                 # H,W,C  RGB
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)       # → BGR
+    img = np.ascontiguousarray(img)                  # ensure OpenCV‑happy
+    return img
+
+
+def load_model(model_path):
+    model = torch.jit.load(model_path)
+    model.to(torch.device('mps'))
+    model.eval()
+    return model
+
+def frame_to_tensor(frame):
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+    # 3) Ensure the array is contiguous (torch needs it) -------------------------
+    frame = np.ascontiguousarray(frame_rgb)
+
+    # 4) numpy -> torch, move channels, scale, add batch if wanted --------------
+    tensor = torch.from_numpy(frame)     # H x W x C, uint8 → int tensor
+    tensor = tensor.to('mps', non_blocking=True)
+    
+    tensor = tensor.permute(2, 0, 1)         # C x H x W
+    tensor = tensor.to(torch.float32).div(255.0)       # float32, [0,1]
+
+    # 5) (Optional) add a batch dim and push to GPU ------------------------------
+    tensor = tensor.unsqueeze(0)             # 1 x C x H x W
+    return tensor.to(torch.float16)
+
+def model_inference(model, tensor):
+    return model(tensor) / 255.0
+
+
+def main():
+    # Load pre-trained Haar cascade classifier for frontal face detection
+    # haarcascade_profileface
+    # haarcascade_frontalface_default
+    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+
+    # Start video capture from the default webcam (device 0)
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        print("Error: Could not open video capture")
+        return
+    
+    # starry_v_bt4_1e10_ep2_float16
+    # nature_oil_painting_ep3_bt4_sw3e10_cw_1e5_float16
+    # starry_ep3_bt4_sw1e11_cw_1e5_float16 <- one of the better ones
+    # nature_oil_painting_ep4_bt4_sw1e10_cw_1e5_float16
+    model = load_model('../models/exports/mps/starry_ep3_bt4_sw1e11_cw_1e5_float16.pt')
+
+    print("Press 'q' to quit")
+    while True:
+        # Read a frame from the webcam
+        ret, frame = cap.read()
+        if not ret:
+            print("Error: Failed to read frame from webcam")
+            break
+
+        # Convert the frame to grayscale (face detector expects gray images)
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+        # Detect faces in the grayscale image
+        for n in [5,3,1,0]:
+            faces = face_cascade.detectMultiScale(
+                gray,
+                scaleFactor=1.3,
+                minNeighbors=n,
+                minSize=(30, 30),
+                flags=cv2.CASCADE_SCALE_IMAGE
+            )
+            if len(faces):
+                break
+
+        x_grow = 1.6
+        y_grow = 1.9
+        height, width, channels = frame.shape
+
+        face_bounds = []
+        for (i, (x, y, w, h)) in enumerate(faces):
+            # Calculate the center of the face
+            center_x = x + w // 2
+            center_y = y + h // 2
+
+            # Calculate the new width and height
+            new_w = int(w * x_grow)
+            new_h = int(h * y_grow)
+
+            # Calculate the new top-left corner
+            new_x = max(0, center_x - new_w // 2)
+            new_y = max(0, center_y - new_h // 2)
+
+            # Ensure the new bounding box is within the image boundaries
+            new_x = min(new_x, width - new_w)
+            new_y = min(new_y, height - new_h)
+
+            face_bounds.append((new_x, new_y, new_w, new_h))
+
+        # Draw bounding boxes around detected faces
+        for (x, y, w, h) in face_bounds:
+            face_roi = frame[y:y+h, x:x+w]
+            # Apply style transfer to the face region
+            input_tensor = frame_to_tensor(face_roi)
+            output_tensor = model_inference(model, input_tensor)[:,:,0:h, 0:w]
+            # print(output_tensor.shape)
+            output_face = tensor_to_bgr(output_tensor)
+            # Replace the face region in the original frame with the stylized face
+            frame[y:y+h, x:x+w] = output_face # output_face[:h, :w]
+            # draw_border(frame, (x, y), (x + w, y + h), (255,255,127), 2, 20, 20)
+            # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+
+        # Display the resulting frame
+        cv2.imshow('Webcam Face Detection', frame)
+
+        # Exit loop when 'q' key is pressed
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    # Release resources
+    cap.release()
+    cv2.destroyAllWindows()
+
+if __name__ == '__main__':
+    main()
diff --git a/demos/video/style-transfer/mirror.chpl b/demos/video/style-transfer/mirror.chpl
@@ -12,6 +12,9 @@ use CTypes;
 
 extern proc run_mirror(): int;
 
+extern record cvVideoCapture {}
+
+extern proc get_video_capture(): cvVideoCapture;
 
 proc main(args: [] string) {
     writeln("Hello, world!");
diff --git a/demos/video/style-transfer/mirror.cpp b/demos/video/style-transfer/mirror.cpp
@@ -1,62 +1,156 @@
 #include "mirror.h"
-
+// #include <SDL2/SDL.h>
 #include <opencv2/opencv.hpp>
 #include <iostream>
 
+
+
+void displayMirror(cv::VideoCapture &cap, const std::string& windowName) {
+    cv::Mat frame;
+    while (true) {
+        // Capture a new frame from the camera
+        cap >> frame;
+        if (frame.empty()) {
+            std::cout << "Error: Blank frame grabbed" << std::endl;
+            break;
+        }
+
+        // Show the frame in the window
+        cv::imshow(windowName, frame);
+
+
+        // Wait for 30ms. Exit if any key is pressed.
+        if (cv::waitKey(30) >= 0) {
+            std::cout << "Key pressed, exiting..." << std::endl;
+            break;
+        }
+    }
+    cap.release();
+    cv::destroyAllWindows();
+}
+
+
+// int displayMirrorLoopSDL() {
+//     if (SDL_Init(SDL_INIT_VIDEO) < 0) {
+//         SDL_Log("Could not initialize SDL: %s", SDL_GetError());
+//         return -1;
+//     }
+
+//     // 2) Open default webcam via OpenCV
+//     cv::VideoCapture cap(0);
+//     if (!cap.isOpened()) {
+//         SDL_Log("Could not open webcam");
+//         SDL_Quit();
+//         return -1;
+//     }
+
+//     // Get camera resolution
+//     int w = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
+//     int h = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
+
+//     // 3) Create a borderless SDL2 window
+//     SDL_Window* window = SDL_CreateWindow(
+//         "Webcam",
+//         SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
+//         w, h,
+//         SDL_WINDOW_BORDERLESS | SDL_WINDOW_SHOWN
+//     );
+
+//     SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+//     // Create a streaming texture in RGB24 format
+//     SDL_Texture* texture = SDL_CreateTexture(
+//         renderer,
+//         SDL_PIXELFORMAT_RGB24,
+//         SDL_TEXTUREACCESS_STREAMING,
+//         w, h
+//     );
+
+//     // 4) Main loop: grab frame, convert, update texture, render
+//     bool running = true;
+//     SDL_Event ev;
+//     while (running) {
+//         // Handle events
+//         while (SDL_PollEvent(&ev)) {
+//             if (ev.type == SDL_QUIT) {
+//                 running = false;
+//             }
+//         }
+
+//         // Grab frame (BGR), convert to RGB
+//         cv::Mat frame;
+//         cap >> frame;
+//         if (frame.empty()) break;
+//         cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
+
+//         // Update SDL texture with the raw pixel data
+//         SDL_UpdateTexture(texture, nullptr, frame.data, frame.step);
+
+//         // Render it
+//         SDL_RenderClear(renderer);
+//         SDL_RenderCopy(renderer, texture, nullptr, nullptr);
+//         SDL_RenderPresent(renderer);
+//     }
+
+//     // Cleanup
+//     SDL_DestroyTexture(texture);
+//     SDL_DestroyRenderer(renderer);
+//     SDL_DestroyWindow(window);
+//     SDL_Quit();
+//     return 0;
+// }
+
 // extern "C" void run_mirror() asm ("run_mirror");
 extern "C" int run_mirror(void) {
-    // cv::VideoCapture cap(0); // Open the default camera (0)
+
+    // return displayMirrorLoopSDL();
+
+    // cv::VideoCapture cap(0); // Open the default camera
     // if (!cap.isOpened()) {
-    //     std::cerr << "Error: Could not open camera." << std::endl;
+    //     std::cerr << "Error: Could not open camera" << std::endl;
+    //     return -1;
     // }
 
-    // cv::Mat frame;
-    // while (true) {
-    //     cap >> frame; // Capture a new frame
-    //     if (frame.empty()) {
-    //         std::cerr << "Error: Could not capture frame." << std::endl;
-    //         break;
-    //     }
+    // // Create a window to display the video
+    // const std::string windowName = "Mirror";
+    // cv::namedWindow(windowName, cv::WINDOW_AUTOSIZE);
 
-    //     cv::imshow("Webcam", frame); // Display the captured frame
-    //     if (cv::waitKey(30) >= 0) break; // Exit on any key press
-    // }
+    // // // Start displaying the video
+    // displayMirror(cap, windowName);
+
+    // return 0;
 
-    // cap.release(); // Release the camera
-    // cv::destroyAllWindows(); // Close all OpenCV windows
 
-    cv::VideoCapture cap(0);
+        cv::VideoCapture cap(0); // Open the default camera
     if (!cap.isOpened()) {
-        std::cerr << "Error: Could not open webcam" << std::endl;
+        std::cerr << "Error: Could not open camera" << std::endl;
         return -1;
     }
 
-    cv::Mat frame;
-    const std::string window_name = "Webcam";
-
     // Create a window to display the video
-    cv::namedWindow(window_name, cv::WINDOW_AUTOSIZE);
+    const std::string windowName = "Mirror";
+    cv::namedWindow(windowName, cv::WINDOW_AUTOSIZE);
 
+    cv::Mat frame;
     while (true) {
         // Capture a new frame from the camera
         cap >> frame;
         if (frame.empty()) {
-            std::cerr << "Error: Blank frame grabbed" << std::endl;
+            std::cout << "Error: Blank frame grabbed" << std::endl;
             break;
         }
 
         // Show the frame in the window
-        cv::imshow(window_name, frame);
+        cv::imshow(windowName, frame);
+
 
         // Wait for 30ms. Exit if any key is pressed.
-        if (cv::waitKey(30) >= 0) break;
+        if (cv::waitKey(30) >= 0) {
+            std::cout << "Key pressed, exiting..." << std::endl;
+            break;
+        }
     }
-
-    // Release the camera and destroy the window
     cap.release();
     cv::destroyAllWindows();
-
     return 0;
-
 }
 
diff --git a/demos/video/style-transfer/mirror.h b/demos/video/style-transfer/mirror.h
diff --git a/demos/video/style-transfer/mirror.o b/demos/video/style-transfer/mirror.o