Point Tracking for Silicone, w/o Specular Highlight Classification

Henningson · Henningson · commit 5b86b8ee0480 · 2025-05-03T18:24:21.000+02:00
diff --git a/source/Correspondences.py b/source/Correspondences.py
@@ -16,6 +16,12 @@ def initialize(laser, camera, maxima_image, minInterval, maxInterval):
             laserRay = laser.ray(y, x)
 
             mask = helper.generateMask(np.zeros_like(maxima_image, dtype=np.uint8), camera.intrinsic(), laser.origin(), laserRay, minInterval, maxInterval,  2, 2)
+
+            vis_mask = np.clip(mask.astype(float) + maxima.astype(float) * 255.0, 0, 255).astype(np.uint8)
+            cv2.imshow("Masks", vis_mask)
+            cv2.waitKey(1)
+
+
             masked_maxima = maxima * mask
             masked_points = masked_maxima.nonzero()
 
diff --git a/source/GUI/MainMenuWidget.py b/source/GUI/MainMenuWidget.py
@@ -34,8 +34,15 @@ def __init__(self, viewer_palette, parent=None):
             "Segmentation",
             [
                 ("Koc et al", "checkbox", False),
-                ("Neural Segmentation", "checkbox", True),
-                ("Silicone Segmentation", "checkbox", False),
+                ("Neural Segmentation", "checkbox", False),
+                ("Silicone Segmentation", "checkbox", True),
+            ],
+        )
+        self.addSubMenu(
+            "Point Tracking",
+            [
+                ("Invivo", "checkbox", False),
+                ("Silicone", "checkbox", True),
             ],
         )
         self.addSubMenu(
@@ -56,6 +63,12 @@ def __init__(self, viewer_palette, parent=None):
              ("Iterations", "field", 2), 
              ("Weight", "field", 10000)],
         )
+        self.addSubMenu(
+            "CUDA",
+            [
+                ("Use", "checkbox", False)
+            ],
+        )
         self.addSubMenu(
             "Least Squares Optimization",
             [("Iterations", "field", 10), ("Learning Rate", "field", 0.1)],
diff --git a/source/GUI/zoomable.py b/source/GUI/zoomable.py
@@ -1,6 +1,7 @@
 from PyQt5.QtCore import Qt
 from PyQt5.QtGui import QImage, QPixmap, QTransform
-from PyQt5.QtWidgets import QGraphicsPixmapItem, QGraphicsScene, QGraphicsView, QMenu
+from PyQt5.QtWidgets import (QGraphicsPixmapItem, QGraphicsScene,
+                             QGraphicsView, QMenu)
 
 
 class Zoomable(QGraphicsView):
@@ -53,6 +54,7 @@ def __init__(self, parent=None):
 
         scene = QGraphicsScene(self)
         self.setScene(scene)
+        self._flipped = False
 
     def wheelEvent(self, event) -> None:
         """
@@ -131,7 +133,11 @@ def update_view(self) -> None:
         """
         Updates the view transformation based on the current zoom level.
         """
-        self.setTransform(QTransform().scale(self._zoom, self._zoom))
+
+        if self._flipped:
+            self.setTransform(QTransform().rotate(90.0).scale(self._zoom, self._zoom))
+        else:
+            self.setTransform(QTransform().scale(self._zoom, self._zoom))
 
     def set_image(self, image: QImage) -> None:
         """
diff --git a/source/GUI/zoomableVideo.py b/source/GUI/zoomableVideo.py
@@ -2,7 +2,7 @@
 
 import zoomable
 from PyQt5 import QtCore
-from PyQt5.QtGui import QImage
+from PyQt5.QtGui import QImage, QTransform
 from PyQt5.QtWidgets import QMenu
 
 
@@ -42,6 +42,13 @@ def add_video(self, video: List[QImage]) -> None:
         self._image_width = video[0].width()
         self._image_height = video[0].height()
 
+        if self._image_width > self._image_height:
+            self._flipped = True
+        else:
+            self._flipped = False
+
+        self.fit_view()
+
     def contextMenuEvent(self, event) -> None:
         """
         Opens a context menu with options for zooming in and out.
diff --git a/source/SiliconeSurfaceReconstruction.py b/source/SiliconeSurfaceReconstruction.py
@@ -150,7 +150,7 @@ def getNeighbours(faces, numPoints):
 # Given
 # 3D Points of type NumFrames X NumPoints x 3
 # Calibrated laser object
-def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions=5, xSubdivisions=3, r_zero = 1.0, T = 2.5, psi = 0.0, ARAP_iterations=2, ARAP_weight = 10000.0):
+def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions=5, xSubdivisions=3, r_zero = 1.0, T = 2.5, psi = 0.0, ARAP_iterations=2, ARAP_weight = 10000.0, flip_y = False):
     left_M5_list = []
     right_M5_list = []
     left_points_list = []
@@ -278,7 +278,8 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
         glottalOutlinePoints[:, 1] = 0.0
 
         # Move vocal folds down a bit
-        alignedPoints[:, 1] = -alignedPoints[:, 1]
+        if flip_y:
+            alignedPoints[:, 1] = -alignedPoints[:, 1]
         alignedPoints -= np.array([[0.0, alignedPoints[:, 1].min()/2.0, 0.0]])
 
         # Split everything into left and right vocal fold
@@ -324,8 +325,8 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
         #left_anchors = generateARAPAnchors(M5_Left, leftPoints)
         #right_anchors = generateARAPAnchors(M5_Right, rightPoints)
         
-        left_anchors, constrained_vertices_left = SurfaceReconstruction.generateARAPAnchors(M5_Left, leftPoints, num_2d, glottalOutlinePoints[np.where(glottalOutlinePoints[:, 0] < 0)], isLeft=True)
-        right_anchors, constrained_vertices_right = SurfaceReconstruction.generateARAPAnchors(M5_Right, rightPoints, num_2d, glottalOutlinePoints[np.where(glottalOutlinePoints[:, 0] >= 0)], isLeft=False)
+        left_anchors, constrained_vertices_left = SurfaceReconstruction.generateARAPAnchors(M5_Left, leftPoints, num_2d, glottalOutlinePoints[np.where(glottalOutlinePoints[:, 0] < 0)], x_subdivisions=xSubdivisions, isLeft=True)
+        right_anchors, constrained_vertices_right = SurfaceReconstruction.generateARAPAnchors(M5_Right, rightPoints, num_2d, glottalOutlinePoints[np.where(glottalOutlinePoints[:, 0] >= 0)], x_subdivisions=xSubdivisions, isLeft=False)
 
         constrained_vertices_list_left.append(constrained_vertices_left.tolist())
         constrained_vertices_list_right.append(constrained_vertices_right.tolist())
diff --git a/source/SurfaceReconstruction.py b/source/SurfaceReconstruction.py
@@ -105,7 +105,7 @@ def rotateZ(mat, degree, deg=True):
     return np.matmul(mat, rotation_matrix)
 
 
-def generateARAPAnchors(vertices, points, nPointsU, glottalOutlinePoints, isLeft=True):
+def generateARAPAnchors(vertices, points, nPointsU, glottalOutlinePoints, x_subdivisions = 2, isLeft=True):
     # We want to first set the lower points of our Control Point Set to be fixed
     lower_indices = np.where(vertices[:, 1] == vertices[vertices[:, 1].argmin(), 1])
     lower_fixed = vertices[lower_indices]
@@ -154,17 +154,19 @@ def generateARAPAnchors(vertices, points, nPointsU, glottalOutlinePoints, isLeft
     # Fit glottal out and midline
     if glottalOutlinePoints.size != 0:
         for i in range(nPointsV):
-            for j in range(3):
-                controlPointIndex = 4 + j + i*nPointsU
+            for j in range(x_subdivisions + 2):
+                controlPointIndex = (x_subdivisions + 2) + j + i*nPointsU
                 controlPoint = vertices[controlPointIndex]
 
                 glottalPoints = glottalOutlinePoints + np.array([[0.0, controlPoint[1], 0.0]])
                 nnIndex, dist = helper.findNearestNeighbour(controlPoint, glottalPoints)
 
+                '''
                 if dist > 3.0:
                     anchors[controlPointIndex] = controlPoint.tolist()
                     continue
-
+                '''
+                    
                 direction = -glottalPoints[nnIndex] + controlPoint
                 direction = direction / np.linalg.norm(direction)
 
diff --git a/source/Viewer.py b/source/Viewer.py
@@ -177,14 +177,10 @@ def __init__(self):
         self.timer_thread.start()
         self.image_timer_thread.start()
 
-        path = "/media/nu94waro/Windows_C/save/datasets/HLEDataset/dataset"
         
+        # (UN)COMMENT THIS TO LOAD HLE DATA AFTER STARTING
         '''
-        self.loadData(
-            "assets/camera_calibration.json",
-            "assets/laser_calibration.json",
-            "assets/example_vid.avi",
-        )'''
+        path = "/media/nu94waro/Windows_C/save/datasets/HLEDataset/dataset"
         
         self.menu_widget.widget().ocs_widget.camera_calib_path = os.path.join(path, "camera_calibration.json")
         self.menu_widget.widget().ocs_widget.laser_calib_path = os.path.join(path, "laser_calibration.json")
@@ -201,7 +197,30 @@ def __init__(self):
             point_tracking.PointTracker(), 
             correspondence_estimation.BruteForceEstimator(10, 30, 40, 100), 
             surface_reconstruction.SurfaceReconstructor())
+        '''
+
+        # (UN)COMMENT THIS TO LOAD SILICONE DATA AFTER STARTING
+
+        path = "assets"
         
+        self.menu_widget.widget().ocs_widget.camera_calib_path = os.path.join(path, "camera_calibration.json")
+        self.menu_widget.widget().ocs_widget.laser_calib_path = os.path.join(path, "laser_calibration.json")
+        self.loadData(
+            "assets/camera_calibration.json",
+            "assets/laser_calibration.json",
+            "assets/example_vid.avi",
+        )
+
+        self._reconstruction_pipeline = reconstruction_pipeline.ReconstructionPipeline(
+            self.camera, 
+            self.laser, 
+            feature_estimation.SiliconeFeatureEstimator(), 
+            point_tracking.SiliconePointTracker(), 
+            correspondence_estimation.BruteForceEstimator(10, 30, 40, 100), 
+            surface_reconstruction.SurfaceReconstructor())
+            
+
+
         '''
         glottal_outline_images = torch.from_numpy(np.load("load/glottal_outline_images.npy")).cuda()
         glottis_segmentations = torch.from_numpy(np.load("load/glottis_segmentations.npy",)).cuda()
@@ -634,8 +653,10 @@ def computeFeatures(self):
         else:
             print("Please choose a Segmentation Algorithm")
 
+        use_cuda = self.menu_widget.widget().getSubmenuValue("CUDA", "Use")
+
         self._reconstruction_pipeline.set_feature_estimator(feature_estimator)
-        self._reconstruction_pipeline.estimate_features(self.video)
+        self._reconstruction_pipeline.estimate_features(self.video if use_cuda else self.video.cpu())
 
         self.graph_widget.updateGraph(
             feature_estimator.glottalAreaWaveform().tolist(), self.graph_widget.glottal_seg_graph
@@ -650,12 +671,27 @@ def computeFeatures(self):
         self.image_widget.point_viewer.add_glottal_midlines(feature_estimator.glottalMidlines())
         self.image_widget.point_viewer.add_glottal_outlines(feature_estimator.glottalOutlines())
 
+        
+        self.update_images_func()
         #self.segmentations = segmentations
         #self.laserdots = segmentations
 
     def trackPoints(self):
-        point_positions = self._reconstruction_pipeline.track_points(self.video)
+        use_cuda = self.menu_widget.widget().getSubmenuValue("CUDA", "Use")
+
+        point_tracker: point_tracking.PointTrackerBase = None
+        if self.menu_widget.widget().getSubmenuValue("Point Tracking", "Invivo"):
+            point_tracker = point_tracking.InvivoPointTracker()
+        elif self.menu_widget.widget().getSubmenuValue("Point Tracking", "Silicone"):
+            point_tracker = point_tracking.SiliconePointTracker()
+        else:
+            print("Please choose a Segmentation Algorithm")
+
+        self._reconstruction_pipeline.set_point_tracker(point_tracker)
+        point_positions = self._reconstruction_pipeline.track_points(self.video if use_cuda else self.video.cpu())
         self.image_widget.point_viewer.add_optimized_points(point_positions.detach().cpu().numpy())
+        
+        self.update_images_func()
 
     def buildCorrespondences(self):
         min_search_space = float(
@@ -738,6 +774,7 @@ def denseShapeEstimation(self):
             psi=psi,
             ARAP_iterations=ARAP_iterations,
             ARAP_weight=ARAP_weight,
+            flip_y=False if self.menu_widget.widget().getSubmenuValue("Point Tracking", "Silicone") else True
         )
 
         self.point_cloud_offsets = [0]
@@ -809,10 +846,6 @@ def denseShapeEstimation(self):
         self.pointcloud_go_mesh_core = self.viewer_widget.get_mesh(self.pointcloud_go_instance_id).mesh_core
 
 
-
-
-
-
         self.controlpoints_offsets = [0]
         self.controlpoints_elements = [len(self.leftDeformed[0]) + len(self.rightDeformed[0])]
 
@@ -823,17 +856,13 @@ def denseShapeEstimation(self):
 
         super_pc_controlpoints = np.concatenate([np.array(self.leftDeformed), np.array(self.rightDeformed)], axis=1)
         super_pc_controlpoints = np.concatenate(super_pc_controlpoints)
-
-        vertex_attributes = {}
-        if not "vertexColor" in vertex_attributes:
-            vertex_attributes["vertexColor"] = np.tile(
-                np.array([0.0, 1.0, 1.0], dtype=np.float32), (super_pc_controlpoints.shape[0], 1)
-            )
-        
+       
         # I'm so sorry for this.
         faces = []
         res_u = zSubdivisions
         res_v = len(self.leftDeformed[0]) // res_u
+        half_verts = len(self.leftDeformed[0])
+        num_verts = 2 * half_verts
         for b in range(len(self.leftDeformed)):
             a = b * num_verts
             for i in range(res_u - 1):
@@ -843,15 +872,15 @@ def denseShapeEstimation(self):
                     p2 = p0 + res_v + 1
                     p3 = p0 + res_v
                     faces.append([p0, p1, p2, p3])  # a quad face
-                    faces.append([len(self.leftDeformed[0]) + p0, len(self.leftDeformed[0]) + p1, len(self.leftDeformed[0]) + p2, len(self.leftDeformed[0]) + p3])
+                    faces.append([half_verts + p0, half_verts + p1, half_verts + p2, half_verts + p3])
 
         core_id = GlMeshCoreId()
         self.viewer_widget.add_mesh_(core_id, super_pc_controlpoints, np.array(faces))
         prefab_id = GlMeshPrefabId(core_id)
         self.viewer_widget.add_mesh_prefab_(
             prefab_id,
             shader="wireframe",
-            vertex_attributes=vertex_attributes,
+            vertex_attributes={},
             face_attributes={},
             uniforms={"lineColor": np.array([0.0, 1.0, 1.0])},
         )
diff --git a/source/correspondence_estimation.py b/source/correspondence_estimation.py
@@ -56,15 +56,15 @@ def compute_correspondences(self, camera, laser, point_image):
         pixelLocations, laserGridIDs = Correspondences.initialize(
                     laser,
                     camera,
-                    point_image.detach().cpu().numpy(),
+                    point_image.detach().cpu().numpy().copy(),
                     self._min_depth,
                     self._max_depth,
                 )
         
         self._correspondences = bruteForceCorrespondence.compute(
             laserGridIDs, 
             pixelLocations, 
-            point_image.detach().cpu().numpy(), 
+            point_image.detach().cpu().numpy().copy(), 
             camera, 
             laser, 
             self._consensus_size, 
diff --git a/source/cv.py b/source/cv.py
@@ -168,6 +168,16 @@ def compute_point_estimates_from_nearest_neighbors(closed_glottis_points: List[t
     return final_point_tensor
 
 
+def compute_pairwise_neighbors(a: torch.tensor, b: torch.tensor) -> torch.tensor:
+# Compute pairwise distances: [N, M]
+    dists = torch.cdist(a, b, p=2)  # Euclidean distance
+    # Find index of nearest neighbor in M for each point in N
+    nearest_indices = dists.argmin(dim=1)  # [N]
+    # Gather nearest neighbors
+    nearest_points = b[nearest_indices]  # [N, 2]
+    return nearest_points
+
+
 def fill_nans_in_point_timeseries(neighbors_over_time: torch.tensor) -> torch.tensor:
     a = 1
     return None
diff --git a/source/feature_estimation.py b/source/feature_estimation.py
@@ -148,6 +148,53 @@ def compute_local_maxima(self, image, kernelsize=7):
         kernel = torch.ones((kernelsize, kernelsize), device=image.device)
         kernel[math.floor(kernelsize // 2), math.floor(kernelsize // 2)] = 0.0
         maxima = (image > kornia.morphology.dilation(image.unsqueeze(0).unsqueeze(0).float(), kernel)).squeeze()
+        '''
+        maxima_locations = maxima.nonzero()
+        maxima_locations = torch.concat([torch.zeros_like(maxima_locations)[:, :1], maxima_locations])
+
+        crops, y_windows, x_windows = cv.extract_windows_from_batch(image.unsqueeze(0), maxima_locations, device=image.device)
+        per_crop_max = crops.amax([-1, -2], keepdim=True)
+        per_crop_min = crops.amin([-1, -2], keepdim=True)
+
+        normalized_crops = (crops - per_crop_min) / (per_crop_max - per_crop_min)
+
+        new_points = []
+
+        for index, crop in enumerate(normalized_crops):
+            local_maximum = cv.unravel_index(
+                torch.argmax(crop[1:-1, 1:-1]), [5, 5]
+            )
+
+            # Add one again, since we removed the border from the local maximum lookup
+            x0, y0 = local_maximum[1] + 1, local_maximum[0] + 1
+
+            # Get 3x3 subwindow from crop, where the local maximum is centered.
+            neighborhood = 1
+            x_min = max(0, x0 - neighborhood)
+            x_max = min(crop.shape[1], x0 + neighborhood + 1)
+            y_min = max(0, y0 - neighborhood)
+            y_max = min(crop.shape[0], y0 + neighborhood + 1)
+
+            sub_image = crop[y_min:y_max, x_min:x_max]
+            sub_image = (sub_image - sub_image.min()) / (
+                sub_image.max() - sub_image.min()
+            )
+
+            centroids = cv.moment_method(
+                sub_image.unsqueeze(0)
+            ).squeeze()
+
+            refined_x = (
+                x_windows[index, 0, 0, 0] + centroids[0] + x0 - 1
+            ).item()
+            refined_y = (
+                y_windows[index, 0, 0, 0] + centroids[1] + y0 - 1
+            ).item()
+
+            new_points.append(torch.tensor([refined_y, refined_x], device=image.device, dtype=torch.float32))
+
+        '''
+
         return maxima, maxima.nonzero()
 
 
diff --git a/source/helper.py b/source/helper.py
diff --git a/source/point_tracking.py b/source/point_tracking.py
diff --git a/source/reconstruction_pipeline.py b/source/reconstruction_pipeline.py