feat(model): mask unused_predictions + link-first mask merge (#474 Gap 1) (#478)

talmo · claude · web-flow · commit 038500de5d34 · 2026-06-06T12:12:04.000-07:00
Add the segmentation-mask analogue of LabeledFrame.unused_predictions and make
the auto-merge cascade honor the from_predicted provenance link for masks,
mirroring the pose human-in-the-loop flow.

- LabeledFrame.unused_predicted_masks: reports PredictedSegmentationMask objects
  with no adopting user mask. A prediction counts as adopted when a user mask in
  the frame links to it via from_predicted (checked first) or, lacking a link,
  spatially overlaps it (bbox-centroid within the 5px auto-merge default).
- Auto-merge link-first matching: _resolve_annotation_auto now seeds its match
  set with from_predicted links (score inf, bypassing the distance threshold)
  before spatial centroid matching, so an adopted correction replaces its exact
  source prediction regardless of distance. Implemented generically via
  _find_annotation_link_matches; only masks carry the link today, so other
  modalities fall through to unchanged spatial behavior.
- Docs: document the link-first precedence in merging.md and unused_predicted_masks
  in segmentation.md.

Scope per design discussion: masks only, minimal/additive (no new public
matcher abstraction). A follow-up issue tracks generalizing this to an
AnnotationMatcher and adding from_predicted to the other dense modalities.

Co-authored-by: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/docs/merging.md b/docs/merging.md
@@ -613,6 +613,16 @@ same threshold as instance matching (default 5 pixels). Each modality is resolve
 independently — centroids by `(x, y)`, bounding boxes and ROIs by their centroid, and
 masks by the centroid of their bounding box.
 
+For segmentation masks, an explicit provenance link takes precedence over spatial
+matching. If a `UserSegmentationMask` records (via `from_predicted`, set by
+[`PredictedSegmentationMask.to_user()`](model/segmentation.md#adopting-predictions-human-in-the-loop))
+that it was adopted from a `PredictedSegmentationMask` present in the merge, the two are
+paired directly — the user correction replaces its exact source prediction regardless of
+centroid distance — and spatial matching only resolves the remaining, unlinked
+annotations. Other modalities do not yet carry a `from_predicted` link and are matched
+spatially only. To list predicted masks that have not been adopted (by link or spatial
+overlap), use `LabeledFrame.unused_predicted_masks`.
+
 New frames (no matching frame in the target) always copy all annotations from the
 source, regardless of strategy.
 
diff --git a/docs/model/segmentation.md b/docs/model/segmentation.md
@@ -155,6 +155,31 @@ resolution can happen later at merge time (see [Merging](../merging.md)):
 
 ```
 
+To find predicted masks that have **not** yet been corrected — the segmentation
+analogue of `LabeledFrame.unused_predictions` for poses — use
+`LabeledFrame.unused_predicted_masks`. A `PredictedSegmentationMask` is treated
+as adopted (and excluded) when a `UserSegmentationMask` in the same frame links
+to it via `from_predicted` (checked first), or, lacking a link, spatially
+overlaps it (bbox-centroid within 5 px). This drives the "retrain only what a
+human corrected" workflow:
+
+```pycon
+>>> import numpy as np
+>>> import sleap_io as sio
+>>> video = sio.Video(filename="example.mp4", open_backend=False)
+>>> mask_data = np.zeros((100, 100), dtype=bool)
+>>> mask_data[20:40, 30:60] = True
+>>> pred_a = sio.PredictedSegmentationMask.from_numpy(mask_data, score=0.87)
+>>> pred_b = sio.PredictedSegmentationMask.from_numpy(mask_data, score=0.62)
+>>> pred_b.offset = (500.0, 500.0)  # a separate prediction elsewhere in the frame
+>>> frame = sio.LabeledFrame(video=video, frame_idx=0, masks=[pred_a, pred_b])
+>>> frame.masks.append(pred_a.to_user())  # adopt pred_a, leave pred_b
+>>> unused = frame.unused_predicted_masks  # only the uncorrected prediction
+>>> len(unused), unused[0] is pred_b
+(1, True)
+
+```
+
 ### Multi-resolution masks
 
 Segmentation masks stored at lower resolution — e.g., from a model that
diff --git a/sleap_io/model/labeled_frame.py b/sleap_io/model/labeled_frame.py
@@ -95,6 +95,47 @@ def _find_annotation_matches(
     return matches
 
 
+def _find_annotation_link_matches(
+    self_list: list,
+    other_list: list,
+) -> list[tuple[int, int, float]]:
+    """Find user<->predicted matches via the ``from_predicted`` provenance link.
+
+    A match is recorded whenever a user annotation in one list explicitly records
+    (via ``from_predicted``) that it was adopted from a predicted annotation in
+    the other list. These take precedence over spatial matching (score is
+    ``inf``), so an adopted correction resolves against its exact source
+    prediction regardless of centroid distance. Only modalities that carry
+    ``from_predicted`` (segmentation masks) can produce link matches; for every
+    other modality this returns an empty list and merge behavior is unchanged.
+
+    Args:
+        self_list: Annotations from the self frame.
+        other_list: Annotations from the other frame.
+
+    Returns:
+        List of ``(self_idx, other_idx, inf)`` tuples for linked pairs.
+    """
+    matches = []
+    other_id_to_idx = {id(b): j for j, b in enumerate(other_list)}
+    self_id_to_idx = {id(a): i for i, a in enumerate(self_list)}
+    # User annotation in self linked to a predicted annotation in other.
+    for i, a in enumerate(self_list):
+        src = getattr(a, "from_predicted", None)
+        if src is not None:
+            j = other_id_to_idx.get(id(src))
+            if j is not None:
+                matches.append((i, j, float("inf")))
+    # User annotation in other linked to a predicted annotation in self.
+    for j, b in enumerate(other_list):
+        src = getattr(b, "from_predicted", None)
+        if src is not None:
+            i = self_id_to_idx.get(id(src))
+            if i is not None:
+                matches.append((i, j, float("inf")))
+    return matches
+
+
 def _resolve_annotation_auto(
     self_list: list,
     other_list: list,
@@ -124,8 +165,13 @@ def _resolve_annotation_auto(
         if not ann.is_predicted:
             merged.append(ann)
 
-    # 2. Find spatial matches
-    matches = _find_annotation_matches(self_list, other_list, attr, threshold)
+    # 2. Find matches: explicit ``from_predicted`` links first (score ``inf``, so
+    # the greedy pass below prefers them over spatial matches and ignores the
+    # distance threshold), then spatial centroid matches as a fallback. Only
+    # masks carry links today, so other modalities fall straight through to
+    # spatial matching with unchanged behavior.
+    matches = _find_annotation_link_matches(self_list, other_list)
+    matches += _find_annotation_matches(self_list, other_list, attr, threshold)
 
     # 3. Greedy one-to-one matching: sort by score descending, assign each
     # self/other index at most once so no annotation is silently dropped.
@@ -422,6 +468,47 @@ def unused_predictions(self) -> list[Instance]:
 
         return unused_predictions
 
+    @property
+    def unused_predicted_masks(self) -> list["SegmentationMask"]:
+        """Return predicted masks in this frame not yet adopted by a user mask.
+
+        A `PredictedSegmentationMask` is considered *adopted* (and so excluded
+        from the result) when some `UserSegmentationMask` in the same frame
+        either links to it via `from_predicted` (checked first) or, lacking an
+        explicit link, spatially overlaps it (bbox-centroid distance within 5 px,
+        the auto-merge default). This mirrors the link-first, spatial-fallback
+        precedence used by the auto-merge cascade and supports the
+        "retrain only what a human corrected" workflow.
+
+        This is the segmentation-mask analogue of `unused_predictions` (which
+        covers `PredictedInstance` objects).
+
+        Returns:
+            The `PredictedSegmentationMask` objects with no adopting user mask.
+        """
+        from sleap_io.model.mask import PredictedSegmentationMask
+
+        predicted = [m for m in self.masks if isinstance(m, PredictedSegmentationMask)]
+        if not predicted:
+            return []
+        user_masks = [m for m in self.masks if not m.is_predicted]
+
+        adopted: set[int] = set()
+        # Link-first: predicted masks explicitly adopted via from_predicted.
+        for u in user_masks:
+            src = getattr(u, "from_predicted", None)
+            if src is not None:
+                adopted.add(id(src))
+        # Spatial fallback: a user mask overlaps a still-unadopted prediction.
+        remaining = [m for m in predicted if id(m) not in adopted]
+        if remaining and user_masks:
+            for self_idx, _other_idx, _score in _find_annotation_matches(
+                remaining, user_masks, "masks", 5.0
+            ):
+                adopted.add(id(remaining[self_idx]))
+
+        return [m for m in predicted if id(m) not in adopted]
+
     def remove_predictions(self):
         """Remove all predicted instances and annotations from the frame."""
         from sleap_io.model.bbox import PredictedBoundingBox
diff --git a/tests/model/test_labeled_frame.py b/tests/model/test_labeled_frame.py
@@ -5,6 +5,7 @@
 
 from sleap_io import Instance, PredictedInstance, Skeleton, Track, Video
 from sleap_io.model.labeled_frame import LabeledFrame
+from sleap_io.model.mask import PredictedSegmentationMask, UserSegmentationMask
 
 
 def test_labeled_frame():
@@ -187,6 +188,74 @@ def test_labeled_frame_unused_predictions():
     assert (lf2.unused_predictions[0].numpy() == 1).all()
 
 
+def test_unused_predicted_masks_none_when_no_predictions():
+    """A frame with no predicted masks reports no unused predictions."""
+    video = Video("test.mp4")
+    user = UserSegmentationMask.from_numpy(np.ones((5, 5), dtype=bool))
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[user])
+    assert lf.unused_predicted_masks == []
+
+
+def test_unused_predicted_masks_unadopted_reported():
+    """A predicted mask with no adopting user mask is reported as unused."""
+    video = Video("test.mp4")
+    pred = PredictedSegmentationMask.from_numpy(np.ones((5, 5), dtype=bool), score=0.9)
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[pred])
+    assert lf.unused_predicted_masks == [pred]
+
+
+def test_unused_predicted_masks_excludes_linked():
+    """A predicted mask adopted via from_predicted is not reported (link-first)."""
+    video = Video("test.mp4")
+    pred = PredictedSegmentationMask.from_numpy(np.ones((5, 5), dtype=bool), score=0.9)
+    user = pred.to_user()  # sets user.from_predicted = pred
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[pred, user])
+    assert lf.unused_predicted_masks == []
+
+
+def test_unused_predicted_masks_link_overrides_distance():
+    """An explicit link counts as adopted even when the masks are far apart."""
+    video = Video("test.mp4")
+    pred = PredictedSegmentationMask.from_numpy(
+        np.ones((5, 5), dtype=bool), score=0.9, offset=(0.0, 0.0)
+    )
+    user = pred.to_user()
+    # Move the user mask far away; the from_predicted link should still count.
+    user.offset = (500.0, 500.0)
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[pred, user])
+    assert lf.unused_predicted_masks == []
+
+
+def test_unused_predicted_masks_spatial_fallback():
+    """An unlinked user mask overlapping a prediction adopts it spatially."""
+    video = Video("test.mp4")
+    pred = PredictedSegmentationMask.from_numpy(
+        np.ones((10, 10), dtype=bool), score=0.9, offset=(5.0, 5.0)
+    )
+    # Unlinked user mask with an overlapping bbox centroid (within 5 px).
+    user = UserSegmentationMask.from_numpy(
+        np.ones((10, 10), dtype=bool), offset=(6.0, 6.0)
+    )
+    assert user.from_predicted is None
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[pred, user])
+    assert lf.unused_predicted_masks == []
+
+
+def test_unused_predicted_masks_mixed():
+    """Only the prediction without an adopting user mask is reported."""
+    video = Video("test.mp4")
+    adopted = PredictedSegmentationMask.from_numpy(
+        np.ones((5, 5), dtype=bool), score=0.9, offset=(0.0, 0.0)
+    )
+    user = adopted.to_user()
+    # A second prediction far from any user mask remains unused.
+    orphan = PredictedSegmentationMask.from_numpy(
+        np.ones((5, 5), dtype=bool), score=0.8, offset=(500.0, 500.0)
+    )
+    lf = LabeledFrame(video=video, frame_idx=0, masks=[adopted, user, orphan])
+    assert lf.unused_predicted_masks == [orphan]
+
+
 def test_labeled_frame_matches():
     """Test LabeledFrame.matches() method."""
     video1 = Video(filename="test1.mp4")
@@ -1469,6 +1538,135 @@ def test_merge_annotations_auto_masks():
     assert not lf1.masks[0].is_predicted
 
 
+def test_merge_annotations_auto_masks_link_overrides_distance():
+    """from_predicted link replaces the source prediction despite far distance."""
+    video = Video(filename="test.mp4", open_backend=False)
+    mask_data = np.ones((10, 10), dtype=bool)
+    # self holds the prediction; other holds a user correction adopted from it
+    # but moved far away (well beyond the 5 px spatial threshold).
+    self_pred = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.7, offset=(5.0, 5.0)
+    )
+    other_user = self_pred.to_user()
+    other_user.offset = (500.0, 500.0)
+
+    lf1 = LabeledFrame(video=video, frame_idx=0, masks=[self_pred])
+    lf2 = LabeledFrame(video=video, frame_idx=0, masks=[other_user])
+
+    lf1._merge_annotations(lf2, strategy="auto")
+
+    # Spatial matching alone would keep both (too far apart); the link resolves
+    # them as the same annotation and the user correction wins.
+    assert len(lf1.masks) == 1
+    assert not lf1.masks[0].is_predicted
+
+
+def test_merge_annotations_auto_masks_link_self_side():
+    """from_predicted link is honored when the user correction lives in self."""
+    video = Video(filename="test.mp4", open_backend=False)
+    mask_data = np.ones((10, 10), dtype=bool)
+    # other holds the source prediction; self holds the user correction adopted
+    # from it, moved far away (beyond the spatial threshold).
+    other_pred = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.7, offset=(5.0, 5.0)
+    )
+    self_user = other_pred.to_user()
+    self_user.offset = (500.0, 500.0)
+
+    lf1 = LabeledFrame(video=video, frame_idx=0, masks=[self_user])
+    lf2 = LabeledFrame(video=video, frame_idx=0, masks=[other_pred])
+
+    lf1._merge_annotations(lf2, strategy="auto")
+
+    # The user correction in self is kept and its linked source prediction from
+    # other is dropped, despite the large spatial distance.
+    assert len(lf1.masks) == 1
+    assert not lf1.masks[0].is_predicted
+
+
+def test_merge_annotations_auto_masks_link_beats_spatial_decoy():
+    """The link pairs with the true source, not a closer spatial decoy."""
+    video = Video(filename="test.mp4", open_backend=False)
+    mask_data = np.ones((6, 6), dtype=bool)
+    # True source the user adopted from, placed far from the user mask.
+    true_src = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.6, offset=(100.0, 100.0)
+    )
+    # A decoy prediction sitting right on top of the user mask.
+    decoy = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.9, offset=(6.0, 6.0)
+    )
+    user = true_src.to_user()
+    user.offset = (5.0, 5.0)  # spatially nearest to `decoy`
+
+    lf1 = LabeledFrame(video=video, frame_idx=0, masks=[true_src, decoy])
+    lf2 = LabeledFrame(video=video, frame_idx=0, masks=[user])
+
+    lf1._merge_annotations(lf2, strategy="auto")
+
+    # The user replaces its linked true source; the decoy stays as a prediction.
+    assert sum(not m.is_predicted for m in lf1.masks) == 1
+    remaining_pred = [m for m in lf1.masks if m.is_predicted]
+    assert remaining_pred == [decoy]
+
+
+def test_merge_annotations_auto_masks_link_multiple_pairs():
+    """Independent from_predicted links resolve in both directions in one merge."""
+    video = Video(filename="test.mp4", open_backend=False)
+    mask_data = np.ones((8, 8), dtype=bool)
+    # Two source predictions, each adopted by a user correction in the *other*
+    # frame, with every mask placed far apart so only the links can pair them.
+    self_pred = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.5, offset=(200.0, 200.0)
+    )
+    other_pred = PredictedSegmentationMask.from_numpy(
+        mask_data, score=0.6, offset=(600.0, 600.0)
+    )
+    self_user = other_pred.to_user()  # self user adopted from other's prediction
+    self_user.offset = (10.0, 10.0)
+    other_user = self_pred.to_user()  # other user adopted from self's prediction
+    other_user.offset = (400.0, 400.0)
+
+    lf1 = LabeledFrame(video=video, frame_idx=0, masks=[self_user, self_pred])
+    lf2 = LabeledFrame(video=video, frame_idx=0, masks=[other_user, other_pred])
+
+    lf1._merge_annotations(lf2, strategy="auto")
+
+    # Both predictions are superseded by their linked corrections; only the two
+    # user masks remain.
+    assert len(lf1.masks) == 2
+    assert all(not m.is_predicted for m in lf1.masks)
+
+
+def test_merge_annotations_auto_masks_link_source_absent():
+    """A from_predicted link to a prediction absent from the merge falls back.
+
+    When the linked source is not present in the opposing frame, no link match is
+    produced (the link cannot be honored) and matching falls back to spatial
+    behavior.
+    """
+    video = Video(filename="test.mp4", open_backend=False)
+    mask_data = np.ones((8, 8), dtype=bool)
+    external = PredictedSegmentationMask.from_numpy(mask_data, score=0.5)
+
+    # self's user links to `external` (not in other); other's user links to
+    # `external` too (not in self). Neither link can resolve to the opposing
+    # frame, and the two user masks are far apart.
+    self_user = external.to_user()
+    self_user.offset = (10.0, 10.0)
+    other_user = external.to_user()
+    other_user.offset = (900.0, 900.0)
+
+    lf1 = LabeledFrame(video=video, frame_idx=0, masks=[self_user])
+    lf2 = LabeledFrame(video=video, frame_idx=0, masks=[other_user])
+
+    lf1._merge_annotations(lf2, strategy="auto")
+
+    # Unresolvable links + far apart → both user masks are kept.
+    assert len(lf1.masks) == 2
+    assert all(not m.is_predicted for m in lf1.masks)
+
+
 def test_merge_annotations_update_tracks_cascades():
     """Update_tracks updates annotation tracks from spatially matched other."""
     from sleap_io.model.centroid import UserCentroid