Skip to content

Commit 7e0032f

Browse files
authored
Merge branch 'master' into master
2 parents b0e0c64 + 8209fd3 commit 7e0032f

File tree

2 files changed

+207
-26
lines changed

2 files changed

+207
-26
lines changed

tools/Sam3/sam3_semantic_segmentation.py

Lines changed: 158 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,22 @@ def parse_arguments() -> argparse.Namespace:
8080
default="copy", # original quality by default
8181
help="Video bitrate: 'copy' (original), '2000k', '4000k', '8000k'",
8282
)
83+
parser.add_argument(
84+
"--coco_video_mode",
85+
type=str,
86+
default="no_coco",
87+
choices=["video", "frames", "no_coco"],
88+
help="For video input with COCO output: 'video' annotates the video"
89+
"as a single source, 'frames' extracts each processed frame as an "
90+
"individual image and annotates per frame, 'no_coco' disables "
91+
"COCO output",
92+
)
8393
return parser.parse_args()
8494

8595

8696
# -------- Functions --------
8797

98+
8899
def convert_avi_to_mp4(directory_path, quality):
89100
"""
90101
Convert AVI file to MP4.
@@ -189,8 +200,15 @@ def create_coco_output(
189200

190201
polygons = result.masks.xyn if is_normalized else result.masks.xy
191202
boxes = result.boxes.xyxyn if is_normalized else result.boxes.xyxy
203+
track_ids = (
204+
result.boxes.id.int().tolist()
205+
if result.boxes.id is not None
206+
else [None] * len(result.boxes.cls)
207+
)
192208

193-
for polygon, bbox, class_id in zip(polygons, boxes, result.boxes.cls):
209+
for polygon, bbox, class_id, track_id in zip(
210+
polygons, boxes, result.boxes.cls, track_ids
211+
):
194212
# Flatten polygon coordinates
195213
polygon_flat = polygon.flatten().tolist()
196214

@@ -206,6 +224,7 @@ def create_coco_output(
206224
"id": annotation_id,
207225
"image_id": image_id,
208226
"category_id": int(class_id) + 1,
227+
"track_id": track_id,
209228
"segmentation": [polygon_flat],
210229
"area": area,
211230
"bbox": [x1, y1, bbox_w, bbox_h],
@@ -293,6 +312,117 @@ def create_yolo_output(
293312
print(f"✓ Created {len(results)} images and labels in {output_dir}")
294313

295314

315+
def create_coco_video_frames_output(
316+
results: List[Any],
317+
text_prompts: List[str],
318+
metadata: Dict[str, Any],
319+
is_normalized: bool,
320+
video_path: str,
321+
stride: int,
322+
outdir: Path,
323+
) -> Dict[str, Any]:
324+
"""Convert SAM3 video results to COCO format with one image entry
325+
per extracted frame."""
326+
frames_dir = outdir / "frames"
327+
frames_dir.mkdir(parents=True, exist_ok=True)
328+
329+
coco_output = {
330+
"info": metadata,
331+
"images": [],
332+
"annotations": [],
333+
"categories": create_coco_categories(text_prompts),
334+
}
335+
336+
cap = cv2.VideoCapture(video_path)
337+
if not cap.isOpened():
338+
raise RuntimeError(f"Failed to open video: {video_path}")
339+
340+
video_name = Path(video_path).stem
341+
annotation_id = 1
342+
frame_idx = 1 if stride > 1 else 0
343+
saved_idx = 0
344+
345+
print(
346+
f"Extracting frames and building per-frame COCO annotations "
347+
f"(stride={stride})..."
348+
)
349+
350+
while cap.isOpened():
351+
ret, frame = cap.read()
352+
if not ret:
353+
break
354+
355+
if frame_idx % stride == 0:
356+
if saved_idx >= len(results):
357+
print(f"Warning: No result available for frame {frame_idx}")
358+
break
359+
360+
frame_name = f"{video_name}_frame_{frame_idx:06d}.jpg"
361+
frame_path = frames_dir / frame_name
362+
cv2.imwrite(str(frame_path), frame)
363+
364+
result = results[saved_idx]
365+
image_id = saved_idx + 1
366+
height, width = result.orig_shape
367+
368+
coco_output["images"].append(
369+
{
370+
"id": image_id,
371+
"file_name": frame_name,
372+
"width": width,
373+
"height": height,
374+
"frame_index": frame_idx,
375+
}
376+
)
377+
378+
if result.masks is not None:
379+
polygons = (
380+
result.masks.xyn if is_normalized else result.masks.xy
381+
)
382+
boxes = (
383+
result.boxes.xyxyn if is_normalized else result.boxes.xyxy
384+
)
385+
track_ids = (
386+
result.boxes.id.int().tolist()
387+
if result.boxes.id is not None
388+
else [None] * len(result.boxes.cls)
389+
)
390+
391+
for polygon, bbox, class_id, track_id in zip(
392+
polygons, boxes, result.boxes.cls, track_ids
393+
):
394+
polygon_flat = polygon.flatten().tolist()
395+
x1, y1, x2, y2 = bbox[:4].tolist()
396+
bbox_w = x2 - x1
397+
bbox_h = y2 - y1
398+
area = float(cv2.contourArea(polygon.astype(np.float32)))
399+
400+
coco_output["annotations"].append(
401+
{
402+
"id": annotation_id,
403+
"image_id": image_id,
404+
"category_id": int(class_id) + 1,
405+
"track_id": track_id,
406+
"segmentation": [polygon_flat],
407+
"area": area,
408+
"bbox": [x1, y1, bbox_w, bbox_h],
409+
"iscrowd": 0,
410+
}
411+
)
412+
annotation_id += 1
413+
414+
saved_idx += 1
415+
416+
if saved_idx % 10 == 0:
417+
print(f" Extracted {saved_idx} frames...")
418+
419+
frame_idx += 1
420+
421+
cap.release()
422+
print(f"✓ Extracted {saved_idx} frames to {frames_dir}")
423+
return coco_output
424+
425+
296426
def create_yolo_video_output(
297427
annotation_type: str,
298428
results: List[Any],
@@ -475,7 +605,7 @@ def patched_postprocess(preds, img, orig_imgs):
475605
# print(f"\n Running prediction on {source_path}...")
476606
results = predictor(source=source_path, text=text_prompts, stream=False)
477607
if is_video(file_paths[0]):
478-
convert_avi_to_mp4(outputs_annotated)
608+
convert_avi_to_mp4(outputs_annotated, args.quality)
479609

480610
if not results:
481611
raise RuntimeError("SAM3 returned no results")
@@ -492,9 +622,22 @@ def patched_postprocess(preds, img, orig_imgs):
492622

493623
if "coco" in output_formats:
494624
print("\n→ Converting to COCO format...")
495-
coco_output = create_coco_output(
496-
results, text_prompts, metadata, is_normalized
497-
)
625+
626+
if is_video(file_paths[0]) and args.coco_video_mode == "frames":
627+
print(" Mode: per-frame (extracting individual frames)...")
628+
coco_output = create_coco_video_frames_output(
629+
results,
630+
text_prompts,
631+
metadata,
632+
is_normalized,
633+
file_paths[0],
634+
args.vid_stride,
635+
outdir,
636+
)
637+
else:
638+
coco_output = create_coco_output(
639+
results, text_prompts, metadata, is_normalized
640+
)
498641

499642
annotation_file = outdir / "annotations.json"
500643
with open(annotation_file, "w") as f:
@@ -510,7 +653,11 @@ def patched_postprocess(preds, img, orig_imgs):
510653

511654
if is_video(file_paths[0]):
512655
create_yolo_video_output(
513-
"bbox", results, yolo_bbox_dir, file_paths[0], args.vid_stride,
656+
"bbox",
657+
results,
658+
yolo_bbox_dir,
659+
file_paths[0],
660+
args.vid_stride,
514661
is_normalized,
515662
)
516663
else:
@@ -524,7 +671,11 @@ def patched_postprocess(preds, img, orig_imgs):
524671

525672
if is_video(file_paths[0]):
526673
create_yolo_video_output(
527-
"seg", results, yolo_seg_dir, file_paths[0], args.vid_stride,
674+
"seg",
675+
results,
676+
yolo_seg_dir,
677+
file_paths[0],
678+
args.vid_stride,
528679
is_normalized,
529680
)
530681
else:

tools/Sam3/sam3_semantic_segmentation.xml

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
<tool id="sam3_semantic_segmentation" name="SAM3 Semantic Segmentation" version="1.0.1+galaxy2" profile="25.1">
1+
<tool id="sam3_semantic_segmentation" name="SAM3 Semantic Segmentation" version="1.0.1+galaxy3" profile="25.1">
22
<description>
33
SAM3 performs text-prompted semantic segmentation on images or videos.
44
</description>
55
<requirements>
6-
<container type="docker">quay.io/arthur_barreau/sam3_tool:1.0.0</container>
6+
<container type="docker">quay.io/arthur_barreau/sam3_tool:1.0.1</container>
77
</requirements>
88
<required_files>
99
<include path="sam3_semantic_segmentation.py" />
@@ -26,9 +26,21 @@
2626
--conf '$conf'
2727
--vid_stride '$vid_stride'
2828
--outdir outputs
29-
--outputs $outputs_format
3029
--name_file '$name_file'
31-
--quality '$input.quality'
30+
#if $input.input_kind == "video"
31+
--quality '$input.quality'
32+
#end if
33+
#if $input.input_kind == "image"
34+
--outputs $input.outputs_format
35+
#else
36+
#if $input.coco_video_mode != "no_coco"
37+
--outputs 'coco, $input.outputs_format'
38+
--coco_video_mode '$input.coco_video_mode'
39+
#else
40+
--outputs $input.outputs_format
41+
#end if
42+
#end if
43+
--do_normalization '$do_normalization'
3244
]]></command>
3345
<inputs>
3446
<param name="sam3_models" label="Model data" type="select" help="Contact the administrator of our Galaxy instance if you miss model data">
@@ -48,10 +60,17 @@
4860
<param name="source" type="data" format="jpg,png,tiff" multiple="true" label="Input images">
4961
<validator type="expression" message="TIFF images must contain exactly 3 channels (RGB).">value.ext not in ('tiff') or value.metadata.channels == 3</validator>
5062
</param>
63+
<param name="outputs_format" type="select" multiple="true" optional="true"
64+
label="Output formats"
65+
help="Select one or more annotation formats to generate.">
66+
<option value="coco">COCO</option>
67+
<option value="yolo_bbox">YOLO bounding boxes</option>
68+
<option value="yolo_seg">YOLO segmentation masks</option>
69+
</param>
5170
</when>
5271
<when value="video">
5372
<param name="source" type="data" format="mp4,avi,mov,gif"
54-
multiple="false" label="Input video"/>
73+
multiple="false" label="Input video file"/>
5574
<param name="quality" type="select" label="Video quality"
5675
help="Select output video bitrate,does not affect processing speed or annotations.
5776
Higher quality than the original is not useful and will only increase file size.">
@@ -61,6 +80,19 @@
6180
<option value="4000k">4000k - Good (1080p)</option>
6281
<option value="8000k">8000k - High quality (1080p)</option>
6382
</param>
83+
<param name="coco_video_mode" type="select"
84+
label="COCO output mode"
85+
help="Controls whether COCO annotations are generated, and how frames are referenced.">
86+
<option value="video" selected="true">Annotate the video — one COCO entry per frame, referencing the video file</option>
87+
<option value="frames">Annotate extracted frames — saves JPGs and one COCO entry per frame image</option>
88+
<option value="no_coco">No COCO output</option>
89+
</param>
90+
<param name="outputs_format" type="select" multiple="true" optional="true"
91+
label="Additional output formats"
92+
help="YOLO formats are optional. COCO output is controlled separately above.">
93+
<option value="yolo_bbox">YOLO bounding boxes</option>
94+
<option value="yolo_seg">YOLO segmentation masks</option>
95+
</param>
6496
</when>
6597
</conditional>
6698
<param name="text_prompt" type="text" label="Text prompt" >
@@ -77,12 +109,6 @@
77109
<param name="vid_stride" type="integer" value="5" min="1" max="300"
78110
label="Video frame stride"
79111
help="For video input: process one frame every N frames."/>
80-
<param name="outputs_format" type="select" multiple="true" optional="true"
81-
label="Output formats">
82-
<option value="coco">COCO</option>
83-
<option value="yolo_bbox">YOLO bounding boxes</option>
84-
<option value="yolo_seg">YOLO segmentation</option>
85-
</param>
86112
<param name="do_normalization" type="boolean" checked="false" label="Normalize outputs?" >
87113
<help><![CDATA[
88114
This option will be applied to all selected formats above.<br/>
@@ -93,26 +119,30 @@ This option will be applied to all selected formats above.<br/>
93119
</param>
94120
</inputs>
95121
<outputs>
96-
<data name="Annotations_coco" format="json" from_work_dir="./outputs/annotations.json" label="Annotation COCO" >
97-
<filter>outputs_format and "coco" in outputs_format</filter>
122+
<data name="Annotations_coco" format="json" from_work_dir="./outputs/annotations.json" label="Annotation COCO">
123+
<filter>input['coco_video_mode'] != "no_coco"</filter>
98124
</data>
99-
<collection name="Outputs_annotated" type="list">
125+
<collection name="Coco_Frames" type="list" label="COCO Extracted Frames">
126+
<filter>input['coco_video_mode'] == "frames"</filter>
127+
<discover_datasets pattern="__name_and_ext__" directory="outputs/frames"/>
128+
</collection>
129+
<collection name="Outputs_annotated" type="list" label="Annotated Outputs">
100130
<discover_datasets pattern="__name_and_ext__" directory="outputs/outputs_annotated"/>
101131
</collection>
102132
<collection name="Yolo_Bbox_Image" type="list" label="YOLO Bbox Images">
103-
<filter>outputs_format and 'yolo_bbox' in outputs_format</filter>
133+
<filter>input['outputs_format'] and 'yolo_bbox' in input['outputs_format']</filter>
104134
<discover_datasets pattern="__name_and_ext__" directory="outputs/yolo_bbox/images"/>
105135
</collection>
106136
<collection name="Yolo_Bbox_Label" type="list" label="YOLO Bbox Labels">
107-
<filter>outputs_format and 'yolo_bbox' in outputs_format</filter>
137+
<filter>input['outputs_format'] and 'yolo_bbox' in input['outputs_format']</filter>
108138
<discover_datasets pattern="__name_and_ext__" directory="outputs/yolo_bbox/labels"/>
109139
</collection>
110140
<collection name="Yolo_Seg_Image" type="list" label="YOLO Seg Images">
111-
<filter>outputs_format and 'yolo_seg' in outputs_format</filter>
141+
<filter>input['outputs_format'] and 'yolo_seg' in input['outputs_format']</filter>
112142
<discover_datasets pattern="__name_and_ext__" directory="outputs/yolo_seg/images"/>
113143
</collection>
114144
<collection name="Yolo_Seg_Label" type="list" label="YOLO Seg Labels">
115-
<filter>outputs_format and 'yolo_seg' in outputs_format</filter>
145+
<filter>input['outputs_format'] and 'yolo_seg' in input['outputs_format']</filter>
116146
<discover_datasets pattern="__name_and_ext__" directory="outputs/yolo_seg/labels"/>
117147
</collection>
118148
</outputs>
@@ -122,10 +152,10 @@ This option will be applied to all selected formats above.<br/>
122152
<conditional name="input">
123153
<param name="input_kind" value="image" />
124154
<param name="source" value="5827603936_3f1d5d715c_z.jpg,shrimp.png"/>
155+
<param name="outputs_format" value="coco,yolo_bbox"/>
125156
</conditional>
126157
<param name="text_prompt" value="elephant"/>
127158
<param name="conf" value="0.25"/>
128-
<param name="outputs_format" value="coco,yolo_bbox"/>
129159
<assert_stdout>
130160
<has_text text="Invalid model!"/>
131161
</assert_stdout>

0 commit comments

Comments
 (0)