Skip to content

Commit 0a5a676

Browse files
authored
Merge pull request #678 from DagsHub/mot_cvat_video_converter
Add MOT and CVAT video annotation import/export support
2 parents 6bef139 + f46169c commit 0a5a676

10 files changed

Lines changed: 1404 additions & 25 deletions

File tree

dagshub/auth/token_auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
3737

3838
def can_renegotiate(self):
3939
# Env var tokens cannot renegotiate, every other token type can
40-
return not type(self._token) is EnvVarDagshubToken
40+
return type(self._token) is not EnvVarDagshubToken
4141

4242
def renegotiate_token(self):
4343
if not self._token_storage.is_valid_token(self._token, self._host):

dagshub/data_engine/annotation/importer.py

Lines changed: 227 additions & 12 deletions
Large diffs are not rendered by default.

dagshub/data_engine/annotation/metadata.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task
44
from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result
55
from dagshub_annotation_converter.formats.yolo.categories import Categories
6+
from dagshub_annotation_converter.ir.base import IRAnnotationBase, IRTaskAnnotation
67
from dagshub_annotation_converter.ir.image import (
78
CoordinateStyle,
89
IRBBoxImageAnnotation,
@@ -11,7 +12,8 @@
1112
IRSegmentationImageAnnotation,
1213
IRSegmentationPoint,
1314
)
14-
from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase
15+
from dagshub_annotation_converter.ir.image.annotations.base import IRImageAnnotationBase
16+
from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack
1517

1618
from dagshub.common.api import UserAPI
1719
from dagshub.common.helpers import log_message
@@ -22,6 +24,8 @@
2224

2325
from dagshub.data_engine.model.datapoint import Datapoint
2426

27+
from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation
28+
2529

2630
class AnnotationMetaDict(dict):
2731
def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs):
@@ -63,13 +67,13 @@ def __init__(
6367
self,
6468
datapoint: "Datapoint",
6569
field: str,
66-
annotations: Optional[Sequence["IRAnnotationBase"]] = None,
70+
annotations: Optional[Sequence["IRTaskAnnotation"]] = None,
6771
meta: Optional[Dict] = None,
6872
original_value: Optional[bytes] = None,
6973
):
7074
self.datapoint = datapoint
7175
self.field = field
72-
self.annotations: list["IRAnnotationBase"]
76+
self.annotations: list["IRTaskAnnotation"]
7377
if annotations is None:
7478
annotations = []
7579
self.annotations = list(annotations)
@@ -94,12 +98,34 @@ def to_ls_task(self) -> Optional[bytes]:
9498
task = LabelStudioTask(
9599
user_id=UserAPI.get_current_user(self.datapoint.datasource.source.repoApi.host).user_id,
96100
)
97-
task.data["image"] = self.datapoint.download_url
98-
# TODO: need to filter out non-image annotations here maybe?
99-
task.add_ir_annotations(self.annotations)
101+
if any(isinstance(ann, IRVideoAnnotationTrack) for ann in self.annotations):
102+
task.data["video"] = self.datapoint.download_url
103+
frames_count = self._get_video_frames_count()
104+
for ann in self.annotations:
105+
if isinstance(ann, IRVideoAnnotationTrack):
106+
ls_ann = VideoRectangleAnnotation.from_ir_track(ann, frames_count=frames_count)
107+
if ann.__pydantic_extra__ is not None:
108+
ls_ann.__pydantic_extra__ = ann.__pydantic_extra__.copy()
109+
task.add_annotation(ls_ann)
110+
else:
111+
task.add_ir_annotation(ann)
112+
else:
113+
task.data["image"] = self.datapoint.download_url
114+
task.add_ir_annotations(self.annotations)
100115
task.meta.update(self.meta)
101116
return task.model_dump_json().encode("utf-8")
102117

118+
def _get_video_frames_count(self) -> Optional[int]:
119+
max_frame: Optional[int] = None
120+
for ann in self.annotations:
121+
if not isinstance(ann, IRVideoAnnotationTrack):
122+
continue
123+
for track_ann in ann.annotations:
124+
max_frame = track_ann.frame_number if max_frame is None else max(max_frame, track_ann.frame_number)
125+
if max_frame is None:
126+
return None
127+
return max_frame + 1
128+
103129
@property
104130
def value(self) -> Optional[bytes]:
105131
"""
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from collections import defaultdict
2+
from typing import Optional, Sequence
3+
4+
from dagshub_annotation_converter.ir.video import (
5+
IRVideoFrameAnnotationBase,
6+
IRVideoAnnotationTrack,
7+
IRVideoSequence,
8+
)
9+
10+
11+
def build_video_sequence_from_annotations(
12+
annotations: Sequence[IRVideoFrameAnnotationBase],
13+
filename: Optional[str] = None,
14+
) -> IRVideoSequence:
15+
# Pre-group annotations into tracks (required by new from_annotations API)
16+
by_track: dict[str, list[IRVideoFrameAnnotationBase]] = defaultdict(list)
17+
for ann in annotations:
18+
object_id = ann.imported_id
19+
if object_id is None:
20+
raise ValueError("Video annotation is missing an object identifier")
21+
by_track[object_id].append(ann)
22+
23+
tracks = [
24+
IRVideoAnnotationTrack.from_annotations(anns, object_id=str(tid))
25+
for tid, anns in by_track.items()
26+
]
27+
28+
sequence = IRVideoSequence.from_annotations(tracks=tracks, filename=filename)
29+
30+
if filename is not None:
31+
for track in sequence.tracks:
32+
for ann in track.annotations:
33+
if ann.filename is None:
34+
ann.filename = filename
35+
36+
# resolved_* methods now cache results automatically
37+
sequence.resolved_video_width()
38+
sequence.resolved_video_height()
39+
sequence.resolved_sequence_length()
40+
41+
return sequence

dagshub/data_engine/model/datasource.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,6 +1668,16 @@ def import_annotations_from_files(
16681668
16691669
Keyword Args:
16701670
yolo_type: Type of YOLO annotations to import. Either ``bbox``, ``segmentation`` or ``pose``.
1671+
image_width: (MOT, CVAT video) Width of the video frames in pixels. \
1672+
Used when the annotation file does not contain dimension metadata.
1673+
image_height: (MOT, CVAT video) Height of the video frames in pixels. \
1674+
Used when the annotation file does not contain dimension metadata.
1675+
video_name: (MOT) Name/path of the video file these annotations belong to. \
1676+
Used to key the resulting annotations when it cannot be inferred from the annotation file.
1677+
video_dir_name: (MOT filesystem layout) Name of the subdirectory containing video files. \
1678+
Defaults to ``"videos"``.
1679+
label_dir_name: (MOT filesystem layout) Name of the subdirectory containing label files. \
1680+
Defaults to ``"labels"``.
16711681
16721682
Example to import segmentation annotations into an ``imported_annotations`` field,
16731683
using YOLO information from an ``annotations.yaml`` file (can be local, or in the repo)::

0 commit comments

Comments
 (0)