-
Notifications
You must be signed in to change notification settings - Fork 162
Expand file tree
/
Copy pathdemo.py
More file actions
111 lines (93 loc) · 3.51 KB
/
demo.py
File metadata and controls
111 lines (93 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# ---------------------------------------------------------------------
# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
# ---------------------------------------------------------------------
import torch
from mobile_sam.utils.transforms import ResizeLongestSide
from qai_hub_models.models._shared.sam.app import SAMApp, SAMInputImageLayout
from qai_hub_models.models._shared.sam.utils import show_image
from qai_hub_models.models.mobilesam.model import (
MODEL_ASSET_VERSION,
MODEL_ID,
SMALL_MODEL_TYPE,
MobileSAM,
)
from qai_hub_models.utils.args import (
demo_model_components_from_cli_args,
get_model_cli_parser,
get_on_device_demo_parser,
validate_on_device_demo_args,
)
from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image
from qai_hub_models.utils.evaluate import EvalMode
IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store(
MODEL_ID, MODEL_ASSET_VERSION, "truck.jpg"
)
# The demo will output image with segmentation mask applied for input points
def main(is_test: bool = False) -> None:
# Demo parameters
parser = get_model_cli_parser(MobileSAM)
parser.add_argument(
"--image",
type=str,
default=IMAGE_ADDRESS,
help="image file path or URL",
)
parser.add_argument(
"--point-coordinates",
type=str,
default="500,575;",
help="Comma separated x and y coordinate. Multiple coordinate separated by `;`."
" e.g. `x1,y1;x2,y2`. Default: `500,375;`",
)
parser.add_argument(
"--single-mask-mode",
type=bool,
default=True,
help="If True, returns single mask. For multiple points multiple masks could lead to better results.",
)
get_on_device_demo_parser(parser, add_output_dir=True)
args = parser.parse_args(["--model-type", SMALL_MODEL_TYPE] if is_test else None)
validate_on_device_demo_args(args, MODEL_ID)
coordinates = [coord for coord in args.point_coordinates.split(";") if coord]
# Load Application
wrapper = MobileSAM.from_pretrained(model_type=args.model_type)
if args.eval_mode == EvalMode.ON_DEVICE:
encoder, decoder = demo_model_components_from_cli_args(
MobileSAM, MODEL_ID, args
)
else:
encoder = wrapper.encoder
decoder = wrapper.decoder
app = SAMApp(
wrapper.sam.image_encoder.img_size,
wrapper.sam.mask_threshold,
SAMInputImageLayout[wrapper.sam.image_format],
[encoder], # type: ignore[list-item]
decoder, # type: ignore[arg-type]
ResizeLongestSide,
wrapper.sam.pixel_mean,
)
# Load Image
image = load_image(args.image)
# Point segmentation using decoder
print("\n** Performing point segmentation **\n")
# Input points
input_coords = []
input_labels = []
for coord in coordinates:
coord_split = coord.split(",")
if len(coord_split) != 2:
raise RuntimeError(
f"Expecting comma separated x and y coordinate. Provided {coord_split}."
)
input_coords.append([int(coord_split[0]), int(coord_split[1])])
# Set label to `1` to include current point for segmentation
input_labels.append(1)
generated_mask, *_ = app.predict_mask_from_points(
image, torch.Tensor(input_coords), torch.Tensor(input_labels)
)
if not is_test:
show_image(image, generated_mask, input_coords, args.output_dir)
if __name__ == "__main__":
main()