Skip to content

Commit 5c2bd84

Browse files
Bordacodex
andcommitted
feat: add RF-DETR keypoint uncertainty visualization
Co-authored-by: Codex <codex@openai.com>
1 parent 89e0427 commit 5c2bd84

5 files changed

Lines changed: 477 additions & 1 deletion

File tree

src/supervision/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
from supervision.key_points.annotators import (
122122
EdgeAnnotator,
123123
VertexAnnotator,
124+
VertexEllipseAnnotator,
124125
VertexLabelAnnotator,
125126
)
126127
from supervision.key_points.core import KeyPoints
@@ -204,6 +205,7 @@
204205
"TraceAnnotator",
205206
"TriangleAnnotator",
206207
"VertexAnnotator",
208+
"VertexEllipseAnnotator",
207209
"VertexLabelAnnotator",
208210
"VideoInfo",
209211
"VideoSink",

src/supervision/key_points/annotators.py

Lines changed: 188 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from abc import ABC, abstractmethod
44
from collections.abc import Sequence
5-
from typing import Any
5+
from typing import Any, Literal
66

77
import cv2
88
import numpy as np
@@ -192,6 +192,193 @@ def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
192192
return scene
193193

194194

195+
class VertexEllipseAnnotator(BaseKeyPointAnnotator):
196+
"""
197+
A class that draws covariance ellipses around skeleton vertices.
198+
199+
The annotator expects per-keypoint covariance matrices stored in
200+
``key_points.data[covariance_data_key]`` with shape ``(N, K, 2, 2)`` in pixel
201+
coordinates, where ``N`` is the number of keypoint sets and ``K`` is the
202+
number of vertices per set.
203+
"""
204+
205+
def __init__(
206+
self,
207+
color: Color = Color.ROBOFLOW,
208+
thickness: int = 2,
209+
sigma: float = 2.0,
210+
covariance_data_key: str = "covariance",
211+
confidence_threshold: float = 0.0,
212+
max_axis_length: float | None = None,
213+
line_style: Literal["solid", "dashed"] = "solid",
214+
dash_length: int = 16,
215+
) -> None:
216+
"""
217+
Args:
218+
color: The color to use for covariance ellipses.
219+
thickness: The line thickness used to draw the ellipses.
220+
sigma: Number of standard deviations represented by the ellipse axes.
221+
covariance_data_key: Key in ``key_points.data`` containing covariance
222+
matrices with shape ``(N, K, 2, 2)``.
223+
confidence_threshold: Minimum keypoint confidence required for drawing.
224+
Ignored when ``key_points.confidence`` is ``None``.
225+
max_axis_length: Optional cap for ellipse semi-axis lengths in pixels.
226+
line_style: Ellipse line style. Use ``"dashed"`` for less visually
227+
dominant uncertainty overlays.
228+
dash_length: Arc length in degrees for each dashed segment. Only used
229+
when ``line_style="dashed"``.
230+
"""
231+
if sigma <= 0:
232+
raise ValueError("sigma must be positive")
233+
if thickness <= 0:
234+
raise ValueError("thickness must be positive")
235+
if max_axis_length is not None and max_axis_length <= 0:
236+
raise ValueError("max_axis_length must be positive when provided")
237+
if line_style not in {"solid", "dashed"}:
238+
raise ValueError("line_style must be 'solid' or 'dashed'")
239+
if dash_length <= 0:
240+
raise ValueError("dash_length must be positive")
241+
242+
self.color = color
243+
self.thickness = thickness
244+
self.sigma = sigma
245+
self.covariance_data_key = covariance_data_key
246+
self.confidence_threshold = confidence_threshold
247+
self.max_axis_length = max_axis_length
248+
self.line_style = line_style
249+
self.dash_length = dash_length
250+
251+
@ensure_cv2_image_for_class_method
252+
def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
253+
"""
254+
Annotates the given scene with covariance ellipses around keypoints.
255+
256+
Args:
257+
scene: The image where covariance ellipses will be drawn. ``ImageType``
258+
accepts either ``numpy.ndarray`` or ``PIL.Image.Image``.
259+
key_points: A collection of key points. Covariance matrices must be
260+
stored in ``key_points.data[covariance_data_key]``.
261+
262+
Returns:
263+
The annotated image, matching the type of ``scene``.
264+
265+
Example:
266+
```pycon
267+
>>> import numpy as np
268+
>>> import supervision as sv
269+
>>> image = np.zeros((100, 100, 3), dtype=np.uint8)
270+
>>> key_points = sv.KeyPoints(
271+
... xy=np.array([[[50, 50], [60, 60]]], dtype=np.float32),
272+
... data={"covariance": np.array([[[[25, 0], [0, 9]], [[9, 0], [0, 4]]]], dtype=np.float32)}
273+
... )
274+
>>> annotator = sv.VertexEllipseAnnotator(color=sv.Color.GREEN)
275+
>>> annotated_frame = annotator.annotate(image.copy(), key_points)
276+
>>> annotated_frame.shape
277+
(100, 100, 3)
278+
279+
```
280+
""" # noqa: E501 // docs
281+
assert isinstance(scene, np.ndarray)
282+
if len(key_points) == 0:
283+
return scene
284+
285+
covariances = self._get_covariances(key_points=key_points)
286+
for detection_index, xy in enumerate(key_points.xy):
287+
for point_index, (x, y) in enumerate(xy):
288+
if np.allclose((x, y), 0):
289+
continue
290+
if key_points.confidence is not None:
291+
confidence = key_points.confidence[detection_index, point_index]
292+
if confidence < self.confidence_threshold:
293+
continue
294+
ellipse = self._covariance_to_ellipse(
295+
covariance=covariances[detection_index, point_index]
296+
)
297+
if ellipse is None:
298+
continue
299+
axis_lengths, angle = ellipse
300+
self._draw_ellipse(
301+
scene=scene,
302+
center=(round(x), round(y)),
303+
axes=axis_lengths,
304+
angle=angle,
305+
)
306+
307+
return scene
308+
309+
def _get_covariances(self, key_points: KeyPoints) -> npt.NDArray[np.float32]:
310+
covariances = key_points.data.get(self.covariance_data_key)
311+
if covariances is None:
312+
raise ValueError(
313+
f"key_points.data must contain {self.covariance_data_key!r} "
314+
"with shape (N, K, 2, 2)."
315+
)
316+
covariances = np.asarray(covariances, dtype=np.float32)
317+
expected_shape = (*key_points.xy.shape[:2], 2, 2)
318+
if covariances.shape != expected_shape:
319+
raise ValueError(
320+
f"Expected covariance shape {expected_shape}, got {covariances.shape}."
321+
)
322+
return covariances
323+
324+
def _covariance_to_ellipse(
325+
self, covariance: npt.NDArray[np.float32]
326+
) -> tuple[tuple[int, int], float] | None:
327+
if not np.isfinite(covariance).all():
328+
return None
329+
try:
330+
eigenvalues, eigenvectors = np.linalg.eigh(covariance.astype(np.float64))
331+
except np.linalg.LinAlgError:
332+
return None
333+
if not np.isfinite(eigenvalues).all() or np.any(eigenvalues <= 0):
334+
return None
335+
336+
order = np.argsort(eigenvalues)[::-1]
337+
eigenvalues = eigenvalues[order]
338+
eigenvectors = eigenvectors[:, order]
339+
axes = self.sigma * np.sqrt(eigenvalues)
340+
if self.max_axis_length is not None:
341+
axes = np.minimum(axes, self.max_axis_length)
342+
axis_lengths = tuple(max(1, round(axis)) for axis in axes)
343+
angle = float(np.degrees(np.arctan2(eigenvectors[1, 0], eigenvectors[0, 0])))
344+
return axis_lengths, angle
345+
346+
def _draw_ellipse(
347+
self,
348+
scene: npt.NDArray[np.uint8],
349+
center: tuple[int, int],
350+
axes: tuple[int, int],
351+
angle: float,
352+
) -> None:
353+
if self.line_style == "solid":
354+
cv2.ellipse(
355+
img=scene,
356+
center=center,
357+
axes=axes,
358+
angle=angle,
359+
startAngle=0,
360+
endAngle=360,
361+
color=self.color.as_bgr(),
362+
thickness=self.thickness,
363+
lineType=cv2.LINE_AA,
364+
)
365+
return
366+
367+
step = self.dash_length * 2
368+
for start_angle in range(0, 360, step):
369+
cv2.ellipse(
370+
img=scene,
371+
center=center,
372+
axes=axes,
373+
angle=angle,
374+
startAngle=start_angle,
375+
endAngle=min(start_angle + self.dash_length, 360),
376+
color=self.color.as_bgr(),
377+
thickness=self.thickness,
378+
lineType=cv2.LINE_AA,
379+
)
380+
381+
195382
class VertexLabelAnnotator:
196383
"""
197384
A class that draws labels of skeleton vertices on images. It uses specified key

src/supervision/key_points/core.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,72 @@
2323
Index2D = tuple[Index1D, Index1D]
2424

2525

26+
def _rfdetr_source_shape(
27+
rfdetr_detections: Detections,
28+
detections_count: int,
29+
) -> npt.NDArray[np.float32]:
30+
source_shape = rfdetr_detections.data.get("source_shape")
31+
if source_shape is None:
32+
raise ValueError(
33+
"RF-DETR detections with keypoint precision data must contain "
34+
"data['source_shape'] with shape (N, 2)."
35+
)
36+
37+
source_shape_array = np.asarray(source_shape, dtype=np.float32)
38+
expected_shape = (detections_count, 2)
39+
if source_shape_array.shape != expected_shape:
40+
raise ValueError(
41+
"Expected RF-DETR source_shape shape "
42+
f"{expected_shape}, got {source_shape_array.shape}."
43+
)
44+
return source_shape_array
45+
46+
47+
def _rfdetr_precision_cholesky_to_pixel_covariance(
48+
precision_cholesky: npt.NDArray[np.float32],
49+
source_shape: npt.NDArray[np.float32],
50+
) -> npt.NDArray[np.float32]:
51+
if precision_cholesky.ndim != 3 or precision_cholesky.shape[2] != 3:
52+
raise ValueError(
53+
"Expected RF-DETR keypoint precision shape (N, K, 3), "
54+
f"got {precision_cholesky.shape}."
55+
)
56+
if precision_cholesky.shape[0] != source_shape.shape[0]:
57+
raise ValueError(
58+
"RF-DETR keypoint precision and source_shape must contain the same "
59+
"number of detections, got "
60+
f"{precision_cholesky.shape[0]} and {source_shape.shape[0]}."
61+
)
62+
63+
covariances = np.full(
64+
(*precision_cholesky.shape[:2], 2, 2), np.nan, dtype=np.float32
65+
)
66+
for detection_index, detection_precision in enumerate(precision_cholesky):
67+
height, width = source_shape[detection_index]
68+
scale = np.diag([width, height]).astype(np.float64)
69+
for keypoint_index, params in enumerate(detection_precision):
70+
if not np.isfinite(params).all():
71+
continue
72+
log_l11 = float(np.clip(params[0], -20.0, 20.0))
73+
l21 = float(np.clip(params[1], -1.0e4, 1.0e4))
74+
log_l22 = float(np.clip(params[2], -20.0, 20.0))
75+
l11 = float(np.exp(log_l11))
76+
l22 = float(np.exp(log_l22))
77+
precision = np.array(
78+
[[l11 * l11, l11 * l21], [l11 * l21, l21 * l21 + l22 * l22]],
79+
dtype=np.float64,
80+
)
81+
try:
82+
covariance = np.linalg.inv(precision)
83+
except np.linalg.LinAlgError:
84+
continue
85+
86+
pixel_covariance = scale @ covariance @ scale
87+
if np.isfinite(pixel_covariance).all():
88+
covariances[detection_index, keypoint_index] = pixel_covariance
89+
return covariances
90+
91+
2692
@dataclass
2793
class KeyPoints:
2894
"""
@@ -231,6 +297,58 @@ def __eq__(self, other: object) -> bool:
231297
]
232298
)
233299

300+
@classmethod
301+
def from_rfdetr(cls, rfdetr_detections: Detections) -> KeyPoints:
302+
"""
303+
Create a `sv.KeyPoints` object from RF-DETR `sv.Detections` output.
304+
305+
RF-DETR attaches keypoint coordinates to ``detections.data["keypoints"]``
306+
with shape ``(N, K, 3)`` where the last dimension stores ``[x, y,
307+
confidence]`` in pixel coordinates. When RF-DETR also provides
308+
``detections.data["keypoint_precision_cholesky"]``, this method converts
309+
those per-keypoint precision parameters into pixel-space covariance matrices
310+
and stores them in ``key_points.data["covariance"]`` for use with
311+
`sv.VertexEllipseAnnotator`.
312+
313+
Args:
314+
rfdetr_detections: RF-DETR prediction returned by ``model.predict()``.
315+
316+
Returns:
317+
A `sv.KeyPoints` object containing RF-DETR keypoints and optional
318+
covariance matrices.
319+
320+
Raises:
321+
ValueError: If the RF-DETR detections do not contain valid keypoints,
322+
or if precision parameters are present without source shape data.
323+
"""
324+
rfdetr_keypoints = rfdetr_detections.data.get("keypoints")
325+
if rfdetr_keypoints is None:
326+
raise ValueError("RF-DETR detections must contain data['keypoints'].")
327+
328+
keypoints = np.asarray(rfdetr_keypoints, dtype=np.float32)
329+
if keypoints.ndim != 3 or keypoints.shape[2] != 3:
330+
raise ValueError(
331+
f"Expected RF-DETR keypoints shape (N, K, 3), got {keypoints.shape}."
332+
)
333+
334+
data: dict[str, npt.NDArray[np.generic] | list[Any]] = {}
335+
precision_cholesky = rfdetr_detections.data.get("keypoint_precision_cholesky")
336+
if precision_cholesky is not None:
337+
source_shape = _rfdetr_source_shape(
338+
rfdetr_detections, detections_count=keypoints.shape[0]
339+
)
340+
data["covariance"] = _rfdetr_precision_cholesky_to_pixel_covariance(
341+
precision_cholesky=np.asarray(precision_cholesky, dtype=np.float32),
342+
source_shape=source_shape,
343+
)
344+
345+
return cls(
346+
xy=keypoints[:, :, :2].astype(np.float32),
347+
confidence=keypoints[:, :, 2].astype(np.float32),
348+
class_id=rfdetr_detections.class_id,
349+
data=data,
350+
)
351+
234352
@classmethod
235353
def from_inference(cls, inference_result: Any) -> KeyPoints:
236354
"""

0 commit comments

Comments
 (0)