Skip to content

Commit 5295d63

Browse files
committed
Used Vocal Fold extents for mesh extent computation
1 parent 0a8f729 commit 5295d63

4 files changed

Lines changed: 63 additions & 18 deletions

File tree

source/NeuralSegmentation.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ def __init__(self, images, path="assets/model.pth.tar"):
254254

255255
self.generateSegmentationData()
256256

257+
self.vocalfold_extents = None
258+
257259

258260
def class_to_color(self, prediction, class_colors):
259261
prediction = np.expand_dims(prediction, 1)
@@ -267,6 +269,7 @@ def class_to_color(self, prediction, class_colors):
267269

268270
return output
269271

272+
270273
def segmentImage(self, frame):
271274
segmentation = self.model(torch.from_numpy(frame).unsqueeze(0).unsqueeze(0).to(DEVICE).float()).argmax(dim=1).detach().cpu().numpy().squeeze().astype(np.uint8)
272275

@@ -281,6 +284,21 @@ def segmentImage(self, frame):
281284

282285
glottal_roi = np.zeros(segmentation.shape, np.uint8)
283286
x, y, w, h = sorted_stats[-2][1]
287+
288+
if self.vocalfold_extents is None:
289+
self.vocalfold_extents = [x, y, w, h]
290+
else:
291+
px, py, ph, pw = self.vocalfold_extents
292+
293+
# Convert to corner coordinates
294+
x1 = min(px, x)
295+
y1 = min(py, y)
296+
x2 = max(px + pw, x + w)
297+
y2 = max(py + ph, y + h)
298+
299+
# Store union box
300+
self.vocalfold_extents = [x1, y1, x2 - x1, y2 - y1]
301+
284302
glottal_roi[y:y+h, x:x+w] = 1
285303
filtered_glottis = ((segmentation == 2) * 255 * glottal_roi).astype(np.uint8)
286304

@@ -301,22 +319,10 @@ def computeLocalMaxima(self, index, kernelsize=7):
301319

302320
return maxima
303321

304-
def generateROI(self):
305-
minX = 0
306-
maxX = 0
307-
minY = 0
308-
maxY = 0
309-
310-
for laserdotSegmentation in self.laserdotSegmentations:
311-
ys, xs = np.nonzero(laserdotSegmentation)
312322

313-
maxY = np.max(ys)
314-
minY = np.min(ys)
315-
maxX = np.max(xs)
316-
minX = np.min(xs)
317323

318-
319-
return [minX, maxX-minX, minY, maxY-minY]
324+
def generateROI(self):
325+
return self.vocalfold_extents
320326

321327

322328
def estimateClosedGlottis(self):

source/Segmentator.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def __init__(self, images):
1616
# List of 2x2 points
1717
self.glottalMidlines = list()
1818

19+
self.vocalfoldOutlines = list()
20+
1921
# List of extracted local Maxima
2022
self.localMaxima = list()
2123

@@ -43,6 +45,24 @@ def segmentImageIndex(self, index):
4345
def getSegmentation(self, index):
4446
return self.segmentations[index]
4547

48+
def computeVocalfoldOutline(self, index):
49+
segmentation = self.segmentations[index]
50+
contours, hierarchy = cv2.findContours(segmentation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
51+
52+
i = 0
53+
contour_points = list()
54+
while (i != -1):
55+
contour_points.append(contours[hierarchy[0][i][0]][:, 0, :])
56+
i = hierarchy[0][i][0]
57+
58+
contourArray = None
59+
if len(contour_points) > 1:
60+
contourArray = np.concatenate(contour_points, axis=0)
61+
else:
62+
contourArray = contour_points[0]
63+
return contourArray - np.ones(contourArray.shape)
64+
65+
4666
def computeGlottalOutline(self, index):
4767
segmentation = self.segmentations[index]
4868
contours, hierarchy = cv2.findContours(segmentation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

source/SiliconeSurfaceReconstruction.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,16 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
208208
t = helper.rayPlaneIntersectionMat(centroid, np.expand_dims(planeNormal, 0), np.zeros(glottalCameraRays.shape), glottalCameraRays)
209209
glottalOutlinePoints = t * glottalCameraRays
210210

211+
xy, wh = segmentator.vocalfoldBoundingBox()
212+
mesh_extents = torch.stack([xy, xy+wh]).detach().cpu().numpy()
213+
meshCameraRays = camera.getRayMat(mesh_extents)
214+
t = helper.rayPlaneIntersectionMat(centroid, np.expand_dims(planeNormal, 0), np.zeros(meshCameraRays.shape), meshCameraRays)
215+
meshExtentPoints = t * meshCameraRays
211216

212-
# Project Glottal Midline Extrema into Pointcloud
213-
217+
218+
219+
220+
# Project Glottal Midline Extrema into Pointcloud
214221
upperMidLine, lowerMidLine = segmentator.glottalMidlines()[i]
215222

216223
# Search for the next best midline if the computation didnt work.
@@ -235,6 +242,7 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
235242
glottalOutlinePoints = glottalOutlinePoints - centroid
236243
gml_point1 = np.expand_dims(gml_point1, 0) - centroid
237244
gml_point2 = np.expand_dims(gml_point2, 0) - centroid
245+
meshExtentPoints = meshExtentPoints - centroid
238246

239247
# Compute rotation matrix, aligning the plane normal to the +Y Axis
240248
rotPlane = helper.rotateAlign(planeNormal/np.linalg.norm(planeNormal), np.array([0.0, 1.0, 0.0]))
@@ -244,6 +252,7 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
244252
glottalOutlinePoints = np.matmul(rotPlane, glottalOutlinePoints.T).T
245253
gml_point1 = np.matmul(rotPlane, gml_point1.T).T
246254
gml_point2 = np.matmul(rotPlane, gml_point2.T).T
255+
meshExtentPoints = np.matmul(rotPlane, meshExtentPoints.T).T
247256

248257

249258

@@ -258,6 +267,7 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
258267
gml_point1 = rotateX(gml_point1, -gmplAngle, deg=False)
259268
gml_point2 = rotateX(gml_point2, -gmplAngle, deg=False)
260269
glottalOutlinePoints = rotateX(glottalOutlinePoints, -gmplAngle, deg=False)
270+
meshExtentPoints = rotateX(meshExtentPoints, -gmplAngle, deg=False)
261271

262272

263273
# Move everything, such that the glottal midlie lies directly ontop the Z Axus
@@ -266,16 +276,19 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
266276
gml_point1 -= np.array([[0.0, 0.0, zOffset]])
267277
gml_point2 -= np.array([[0.0, 0.0, zOffset]])
268278
glottalOutlinePoints -= np.array([0.0, 0.0, zOffset])
279+
meshExtentPoints -= np.array([0.0, 0.0, zOffset])
269280

270281
# Rotate everything around by 90 degrees again
271282
alignedPoints = rotateX(alignedPoints, -90).astype(np.float)
272283
gml_point1 = rotateX(gml_point1, -90).astype(np.float)
273284
gml_point2 = rotateX(gml_point2, -90).astype(np.float)
274285
glottalOutlinePoints = rotateX(glottalOutlinePoints, -90).astype(np.float)
286+
meshExtentPoints = rotateX(meshExtentPoints, -90).astype(np.float)
275287

276288

277289
# Set Y Values to zero of the glottal outline points
278290
glottalOutlinePoints[:, 1] = 0.0
291+
meshExtentPoints[:, 1] = 0.0
279292

280293
if flip_y:
281294
alignedPoints[:, 1] = -alignedPoints[:, 1]
@@ -294,7 +307,7 @@ def controlPointBasedARAP(triangulatedPoints, camera, segmentator, zSubdivisions
294307

295308
# Find X-Y-Z Extent of Vocalfolds to generate fitting M5 Model
296309
if first:
297-
minX, maxX, minY, maxY, minZ, maxZ = findXYZExtent(aligned)
310+
minX, maxX, minY, maxY, minZ, maxZ = findXYZExtent(meshExtentPoints)
298311
first = False
299312

300313
# Generate M5 Model for left and right vocalfold

source/feature_estimation.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ def compute_features(self, video: torch.tensor) -> None:
310310

311311
self._glottal_midlines = []
312312

313+
self._vocalfold_bounding_box = None
314+
313315
num_frames = video.shape[0]
314316
# video_clone = (video.clone().unsqueeze(1).float() / 255).repeat(1, 3, 1, 1)
315317
batch_size = 8
@@ -367,12 +369,16 @@ def compute_features(self, video: torch.tensor) -> None:
367369

368370
glottal_roi = torch.zeros(label.shape, device=labels.device)
369371
x, y, w, h = sorted_stats[-2][1]
372+
373+
if self._vocalfold_bounding_box is None:
374+
self._vocalfold_bounding_box = [torch.tensor([x, y]), torch.tensor([w, h])]
375+
376+
370377
glottal_roi[y:y+h, x:x+w] = 1
371378
wat.append(glottal_roi)
372379
wat = torch.stack(wat)
373380
labels = labels * wat
374381

375-
376382
end_event_nn.record()
377383
self._laserpoint_segmentations[i:i+batch_size] = (labels == 3) * 1
378384

0 commit comments

Comments
 (0)