Mm/add isx support (open-edge-platform#2249)

mohitmeh12 · web-flow · commit 20f25884fcfb · 2026-03-20T15:09:44.000-07:00
diff --git a/robotics-ai-suite/components/multicam-demo/config/config_isx031_4cameras.js b/robotics-ai-suite/components/multicam-demo/config/config_isx031_4cameras.js
@@ -0,0 +1,46 @@
+// Copyright (C) 2025 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+[
+        {
+                "name":         "yolov8n-seg",
+                "model":        "/opt/ros/humble/share/pyrealsense2-ai-demo/multicam-demo/models/yolov8/FP16/yolov8n-seg.xml",
+                "device":       "GPU",
+                "data_type":    "FP16",
+                "source":       "/dev/video-isx031-a-0",
+                "adapter":      "yolov8",
+                "width":        1920,
+                "height":       1536
+        },
+        {
+                "name":         "yolov8n-seg",
+                "model":        "/opt/ros/humble/share/pyrealsense2-ai-demo/multicam-demo/models/yolov8/FP16/yolov8n-seg.xml",
+                "device":       "CPU",
+                "data_type":    "FP16",
+                "source":       "/dev/video-isx031-b-0",
+                "adapter":      "yolov8",
+                "width":        1920,
+                "height":       1536
+        },
+        {
+                "name":         "yolov8n",
+                "model":        "/opt/ros/humble/share/pyrealsense2-ai-demo/multicam-demo/models/yolov8/FP16/yolov8n.xml",
+                "device":       "GPU",
+                "data_type":    "FP16",
+                "source":       "/dev/video-isx031-c-0",
+                "adapter":      "yolov8",
+                "width":        1920,
+                "height":       1536
+        },
+        {
+                "name":         "yolov8n",
+                "model":        "/opt/ros/humble/share/pyrealsense2-ai-demo/multicam-demo/models/yolov8/FP16/yolov8n.xml",
+                "device":       "GPU",
+                "data_type":    "FP16",
+                "source":       "/dev/video-isx031-d-0",
+                "adapter":      "yolov8",
+                "width":        1920,
+                "height":       1536
+        }
+]
diff --git a/robotics-ai-suite/components/multicam-demo/requirements.txt b/robotics-ai-suite/components/multicam-demo/requirements.txt
@@ -6,6 +6,8 @@ cython
 screeninfo
 dpnp
 onnx
+onnyxscript
 ultralytics==8.0.43
 pyrealsense2
 openvino-dev
+setuptools<=70.0.0
diff --git a/robotics-ai-suite/components/multicam-demo/scripts/generate_ai_models.sh b/robotics-ai-suite/components/multicam-demo/scripts/generate_ai_models.sh
@@ -16,8 +16,8 @@ cd ./models/yolov8/ || exit
 i=1
 status=0
 for i in "${yolov8_models[@]}"; do
-  gen_yolov8_model_cmd=$(python3 ../../src/mo.py --model="$i".pt --data_type="$datatype")
-  if [[ "$gen_yolov8_model_cmd" -ne 0 ]]
+  python3 ../../src/mo.py --model="$i".pt --data_type="$datatype"
+  if [[ $? -ne 0 ]]
   then
     status=1
     break
diff --git a/robotics-ai-suite/components/multicam-demo/src/mo.py b/robotics-ai-suite/components/multicam-demo/src/mo.py
@@ -5,6 +5,16 @@
 
 import argparse
 import torch
+
+# PyTorch 2.6+ changed torch.load default to weights_only=True, which blocks
+# loading ultralytics checkpoints that contain custom classes. Override it here
+# since we trust local model files.
+_torch_load_orig = torch.load
+def _torch_load_unsafe(*args, **kwargs):
+    kwargs.setdefault('weights_only', False)
+    return _torch_load_orig(*args, **kwargs)
+torch.load = _torch_load_unsafe
+
 from ultralytics import YOLO
 
 if __name__ == '__main__':
@@ -17,6 +27,6 @@
 	half=True if args.data_type=="FP16" else False
 
 	model = YOLO(args.model)
-	model.export(format="openvino", dynamic=True, half=half)
+	model.export(format="openvino", dynamic=True, half=half, opset=18)
 
 
diff --git a/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo/images_capture.py b/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo/images_capture.py
@@ -114,6 +114,9 @@ class VideoCapWrapper(ImagesCapture):
     def __init__(self, input, loop):
         self.loop = loop
         self.cap = cv2.VideoCapture()
+        # Device nodes (e.g. /dev/video-isx031-a-0) are handled by CameraCapWrapper
+        if input.startswith('/dev/'):
+            raise InvalidInput("Device path - use CameraCapWrapper: {}".format(input))
         status = self.cap.open(input)
         if not status:
            raise InvalidInput("Can't open the video from {}".format(input))
@@ -186,18 +189,28 @@ class CameraCapWrapper(ImagesCapture):
     def __init__(self, input, camera_resolution):
 
         self.cap = cv2.VideoCapture()
+        # Accept both integer indices ("0") and device paths ("/dev/video-isx031-a-0")
         try:
-            status = self.cap.open(int(input))
-            self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
-            self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_resolution[0])
-            self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_resolution[1])
-            self.cap.set(cv2.CAP_PROP_FPS, 30)
+            device = int(input)
+        except ValueError:
+            if not os.path.exists(input):
+                raise InvalidInput("Can't find the camera {}".format(input))
+            device = input
+
+        status = self.cap.open(device)
+        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_resolution[0])
+        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_resolution[1])
+        self.cap.set(cv2.CAP_PROP_FPS, 30)
+        if isinstance(device, int):
+            # MJPG and autofocus are only applicable to indexed USB cameras
             self.cap.set(cv2.CAP_PROP_AUTOFOCUS, 1)
             self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
-            if not status:
-                raise OpenError("Can't open the camera from {}".format(input))
-        except ValueError:
-            raise InvalidInput("Can't find the camera {}".format(input))
+        else:
+            # ISX031 and similar IPU cameras output UYVY
+            self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'UYVY'))
+        if not status:
+            raise OpenError("Can't open the camera from {}".format(input))
 
     def read(self):
         status, image = self.cap.read()
diff --git a/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo/inference_manager.py b/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo/inference_manager.py
@@ -18,12 +18,12 @@
 from . import perf_visualizer as pv
 
 class InferenceManager(Thread):
-	def __init__(self, model_adapter, input, data_type, async_mode=False):
+	def __init__(self, model_adapter, input, data_type, async_mode=False, camera_resolution=(1280, 720)):
 		super().__init__()
 		self.adapter = model_adapter
 		self.input = input
 		self.data_type = data_type
-		self.cap = VideoCapture(input, True) if input is not None else None
+		self.cap = VideoCapture(input, True, camera_resolution) if input is not None else None
 		self.async_mode = async_mode
 		self.frames_number = 0
 		self.start_time = None
diff --git a/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo_launcher.py b/robotics-ai-suite/components/multicam-demo/src/pyrealsense2_ai_demo_launcher.py
@@ -7,6 +7,7 @@
 import cv2
 import numpy as np
 import json
+import time
 import imutils
 from yolov8_model  import YoloV8Model
 import pyrealsense2_ai_demo
@@ -18,74 +19,106 @@
 	yolov8 = YoloV8Model
 )
 
-def run(config_file):
+def run(config_file, no_display=False, verbose=False):
 
-	config = json.load(open(config_file))
+	with open(config_file) as f:
+		raw = '\n'.join(line for line in f if not line.lstrip().startswith('//'))
+	config = json.loads(raw)
 
 	apps = []
 	for app in  config:
 		adapter = adapters[app["adapter"]]
+		if verbose:
+			print(f"[VERBOSE] Loading model: {app['model']} on {app['device']} for source {app['source']}")
 		model = adapter(app["model"], app["device"], app["name"])
-		apps.append(InferenceManager(model, app["source"], config[0]["data_type"]))
-		if len(apps) > MAX_APP:
+		resolution = (app.get("width", 1280), app.get("height", 720))
+		if verbose:
+			print(f"[VERBOSE] Opening camera: {app['source']} at {resolution}")
+		apps.append(InferenceManager(model, app["source"], app["data_type"], camera_resolution=resolution))
+		if len(apps) >= MAX_APP:
 			break;
 
+	if verbose:
+		print(f"[VERBOSE] Starting {len(apps)} inference thread(s)...")
 	for app in apps:
 		app.start()
 
+	if verbose:
+		print("[VERBOSE] All threads started. Entering main loop. Press Ctrl+C to stop.")
+
 	vis =  np.zeros((720, 1280, 3), dtype = np.uint8)
 	height,width = vis.shape[:2]
 	margin = 5
-	cv2.namedWindow("demo", cv2.WND_PROP_FULLSCREEN)
-	cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
+	if not no_display:
+		cv2.namedWindow("demo", cv2.WND_PROP_FULLSCREEN)
+		cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
 	fullScreen = None
-	num_frames = 0;
-	while True:
-		images = []
-		for app in apps:
-			img = app.get(1)
-			if img is not None:
-				images.append(img)
-
-		if len(images) != len(apps):
-			continue
-
-		if len(images) == 1:
-			vis = images[0]
-		else:
-			sh,sw = int(height/2),int(width/2)
-			for i in range(len(images)):
-				app_image = imutils.resize(images[i], height=sh-margin)
-				h,w = app_image.shape[:2]
-				xoff = int(i%2)*sw + int((sw-w)/2) + int(i%2)*margin
-				yoff = int(i/2)*sh + int(i/2)*margin
-				vis[yoff:yoff+h, xoff:xoff+w] = app_image
-
-		cv2.imshow("demo", vis)
-		key = cv2.waitKey(1)
-
-		if key in {ord('q'), ord('Q'), 27}:
-
-			break
-
-		if key == ord('f'):
-			cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN if not fullScreen else cv2.WINDOW_NORMAL)
-			fullScreen = not fullScreen
-
-		num_frames += 1
-		if fullScreen is None and num_frames > 3:
-			cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL)
-			fullScreen = False
+	num_frames = 0
+	last_verbose_time = time.time()
+	try:
+		while True:
+			images = []
+			for app in apps:
+				img = app.get(1)
+				if img is not None:
+					images.append(img)
+
+			if verbose and (time.time() - last_verbose_time) >= 2.0:
+				last_verbose_time = time.time()
+				for idx, app in enumerate(apps):
+					img = app.get()
+					shape = img.shape if img is not None else None
+					fps = app.fps() if app.start_time is not None else 0
+					print(f"[VERBOSE] cam[{idx}] source={app.input}  frames={app.frames_number}  fps={fps:.1f}  last_shape={shape}")
+
+			if len(images) != len(apps):
+				continue
+
+			if no_display:
+				num_frames += 1
+				if verbose and num_frames % 30 == 0:
+					print(f"[VERBOSE] {num_frames} composite frames rendered (no-display mode)")
+				continue
+
+			if len(images) == 1:
+				vis = images[0]
+			else:
+				sh,sw = int(height/2),int(width/2)
+				for i in range(len(images)):
+					app_image = imutils.resize(images[i], height=sh-margin)
+					h,w = app_image.shape[:2]
+					xoff = int(i%2)*sw + int((sw-w)/2) + int(i%2)*margin
+					yoff = int(i/2)*sh + int(i/2)*margin
+					vis[yoff:yoff+h, xoff:xoff+w] = app_image
+
+			cv2.imshow("demo", vis)
+			key = cv2.waitKey(1)
+
+			if key in {ord('q'), ord('Q'), 27}:
+				break
+
+			if key == ord('f'):
+				cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN if not fullScreen else cv2.WINDOW_NORMAL)
+				fullScreen = not fullScreen
+
+			num_frames += 1
+			if fullScreen is None and num_frames > 3:
+				cv2.setWindowProperty("demo", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL)
+				fullScreen = False
+	except KeyboardInterrupt:
+		print("\n[INFO] Interrupted by user.")
 
 	for app in apps:
 		app.stop()
 
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser()
-	parser.add_argument('--config', default='./config.js', help='confile file')
+	parser.add_argument('--config', default='./config.js', help='config file')
+	parser.add_argument('--no-display', action='store_true', help='skip cv2 window rendering')
+	parser.add_argument('--verbose', action='store_true', help='print per-camera stats every 2 seconds')
 
 	args = parser.parse_args()
 
-	run(args.config)
+	run(args.config, no_display=args.no_display, verbose=args.verbose)