UWARG · samcyx · Jan 24, 2026 · Jan 24, 2026 · Jan 26, 2026 · Jan 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,5 @@ venv/
 
 # Logging
 logs/
+
+notes.txt
diff --git a/Basalt_VIO_RTab.py b/Basalt_VIO_RTab.py
@@ -0,0 +1,51 @@
+import depthai as dai
+from cameraBundle import CameraBundle
+
+
+# Create pipeline
+def add_basalt_vio_rtab(p: dai.Pipeline, cameras: "CameraBundle" = None):
+    with p:
+        # Ensure we have a CameraBundle (created only if not provided)
+        cameras = cameras or CameraBundle(p)
+        fps = 60
+        width = 640
+        height = 400
+        # Define sources and outputs
+
+        left = cameras.monoLeft
+        right = cameras.monoRight
+        imu = p.create(dai.node.IMU)
+        odom = p.create(dai.node.BasaltVIO)
+        slam = cameras.slam
+        stereo = cameras.stereo
+        params = {
+            "RGBD/CreateOccupancyGrid": "true",
+            "Grid/3D": "true",
+            "Rtabmap/SaveWMState": "true",
+        }
+        slam.setParams(params)
+
+        imu.enableIMUSensor(
+            [dai.IMUSensor.ACCELEROMETER_RAW, dai.IMUSensor.GYROSCOPE_RAW], 200
+        )
+        imu.setBatchReportThreshold(1)
+        imu.setMaxBatchReports(10)
+
+        stereo.setExtendedDisparity(False)
+        stereo.setLeftRightCheck(True)
+        stereo.setSubpixel(True)
+        stereo.setRectifyEdgeFillColor(0)
+        stereo.enableDistortionCorrection(True)
+        stereo.initialConfig.setLeftRightCheckThreshold(10)
+        stereo.setDepthAlign(dai.CameraBoardSocket.CAM_B)
+
+        left.requestOutput((width, height)).link(stereo.left)
+        right.requestOutput((width, height)).link(stereo.right)
+        stereo.syncedLeft.link(odom.left)
+        stereo.syncedRight.link(odom.right)
+        stereo.depth.link(slam.depth)
+        stereo.rectifiedLeft.link(slam.rect)
+        imu.out.link(odom.imu)
+
+        odom.transform.link(slam.odom)
+
diff --git a/cameraBundle.py b/cameraBundle.py
@@ -0,0 +1,25 @@
+import depthai as dai
+
+
+class CameraBundle:
+    """Helper to create and link camera nodes and stereo outputs."""
+
+    def __init__(
+        self,
+        pipeline: dai.Pipeline,
+        rgb_socket=dai.CameraBoardSocket.CAM_A,
+        left_socket=dai.CameraBoardSocket.CAM_B,
+        right_socket=dai.CameraBoardSocket.CAM_C,
+        mono_resolution=(640, 400),
+    ):
+        self.pipeline = pipeline
+        self.camRgb = pipeline.create(dai.node.Camera).build(rgb_socket)
+        self.monoLeft = pipeline.create(dai.node.Camera).build(left_socket)
+        self.monoRight = pipeline.create(dai.node.Camera).build(right_socket)
+        self.slam = pipeline.create(dai.node.RTABMapSLAM)
+
+        self.stereo = pipeline.create(dai.node.StereoDepth)
+        self.leftOutput = self.monoLeft.requestOutput(mono_resolution)
+        self.rightOutput = self.monoRight.requestOutput(mono_resolution)
+        self.leftOutput.link(self.stereo.left)
+        self.rightOutput.link(self.stereo.right)
diff --git a/object_tracker.py b/object_tracker.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+import depthai as dai
+from cameraBundle import CameraBundle
+
+fullFrameTracking = False
+
+# Create pipeline
+
+
+def add_object_tracker(p: dai.Pipeline, cameras: "CameraBundle" = None):
+    with p:
+        cameras = cameras or CameraBundle(p)
+        camRgb = cameras.camRgb
+        stereo = cameras.stereo
+
+        spatialDetectionNetwork = p.create(
+            dai.node.SpatialDetectionNetwork
+        ).build(camRgb, stereo, "yolov6-nano")
+        objectTracker = p.create(dai.node.ObjectTracker)
+
+        spatialDetectionNetwork.setConfidenceThreshold(0.6)
+        spatialDetectionNetwork.input.setBlocking(False)
+        spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
+        spatialDetectionNetwork.setDepthLowerThreshold(100)
+        spatialDetectionNetwork.setDepthUpperThreshold(5000)
+
+        objectTracker.setDetectionLabelsToTrack([0])  # track only person
+        # possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
+        objectTracker.setTrackerType(dai.TrackerType.SHORT_TERM_IMAGELESS)
+        # take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
+        objectTracker.setTrackerIdAssignmentPolicy(
+            dai.TrackerIdAssignmentPolicy.SMALLEST_ID
+        )
+
+        if fullFrameTracking:
+            camRgb.requestFullResolutionOutput().link(objectTracker.inputTrackerFrame)
+            # do not block the pipeline if it's too slow on full frame
+            objectTracker.inputTrackerFrame.setBlocking(False)
+            objectTracker.inputTrackerFrame.setMaxSize(1)
+        else:
+            spatialDetectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
+
+        spatialDetectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
+        spatialDetectionNetwork.out.link(objectTracker.inputDetections)
diff --git a/pipeline-combine.py b/pipeline-combine.py
@@ -0,0 +1,28 @@
+import Basalt_VIO_RTab
+from rerun_node import RerunNode
+import object_tracker
+import depthai as dai
+import time
+from cameraBundle import CameraBundle
+# This is the main pipeline-combine.py file that integrates Basalt VIO with RTAB-Map SLAM
+
+
+with dai.Pipeline() as pipeline:
+    cameraBundle = CameraBundle(pipeline)
+    object_tracker.add_object_tracker(pipeline)
+    Basalt_VIO_RTab.add_basalt_vio_rtab(pipeline, cameraBundle)
+    rerunViewer = RerunNode()
+    slam = cameraBundle.slam
+    slam.transform.link(rerunViewer.inputTrans)
+    slam.passthroughRect.link(rerunViewer.inputImg)
+    slam.occupancyGridMap.link(rerunViewer.inputGrid)
+    slam.obstaclePCL.link(rerunViewer.inputObstaclePCL)
+    slam.groundPCL.link(rerunViewer.inputGroundPCL)
+    pipeline.start()
+    try:
+        while pipeline.isRunning():
+            time.sleep(0.1)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        pipeline.stop()
diff --git a/rerun_node.py b/rerun_node.py
@@ -0,0 +1,98 @@
+import depthai as dai
+import sys
+
+from pathlib import Path
+
+installExamplesStr = (
+    Path(__file__).absolute().parents[2] / "install_requirements.py --install_rerun"
+)
+try:
+    import rerun as rr
+except ImportError:
+    sys.exit(
+        "Critical dependency missing: Rerun. Please install it using the command: '{} {}' and then rerun the script.".format(
+            sys.executable, installExamplesStr
+        )
+    )
+
+import cv2
+
+
+class RerunNode(dai.node.ThreadedHostNode):
+    def __init__(self):
+        dai.node.ThreadedHostNode.__init__(self)
+        self.inputTrans = dai.Node.Input(self)
+        self.inputImg = dai.Node.Input(self)
+        self.inputObstaclePCL = dai.Node.Input(self)
+        self.inputGroundPCL = dai.Node.Input(self)
+        self.inputGrid = dai.Node.Input(self)
+        self.positions = []
+        self.fx = 400.0
+        self.fy = 400.0
+        self.intrinsicsSet = False
+
+    def getFocalLengthFromImage(self, imgFrame):
+        p = self.getParentPipeline()
+        calibHandler = p.getDefaultDevice().readCalibration()
+        intrinsics = calibHandler.getCameraIntrinsics(
+            dai.CameraBoardSocket(imgFrame.getInstanceNum()),
+            imgFrame.getWidth(),
+            imgFrame.getHeight(),
+        )
+        self.fx = intrinsics[0][0]
+        self.fy = intrinsics[1][1]
+        self.intrinsicsSet = True
+
+    def run(self):
+        rr.init("", spawn=True)
+        rr.log("world", rr.ViewCoordinates.FLU)
+        rr.log("world/ground", rr.Boxes3D(half_sizes=[3.0, 3.0, 0.00001]))
+        while self.isRunning():
+            transData = self.inputTrans.get()
+            imgFrame = self.inputImg.get()
+            if not self.intrinsicsSet:
+                self.getFocalLengthFromImage(imgFrame)
+            pclObstData = self.inputObstaclePCL.tryGet()
+            pclGrndData = self.inputGroundPCL.tryGet()
+            mapData = self.inputGrid.tryGet()
+            if transData is not None:
+                trans = transData.getTranslation()
+                quat = transData.getQuaternion()
+                position = rr.datatypes.Vec3D([trans.x, trans.y, trans.z])
+                rr.log(
+                    "world/camera",
+                    rr.Transform3D(
+                        translation=position,
+                        rotation=rr.datatypes.Quaternion(
+                            xyzw=[quat.qx, quat.qy, quat.qz, quat.qw]
+                        ),
+                    ),
+                )
+                self.positions.append(position)
+                lineStrip = rr.components.LineStrip3D(self.positions)
+                rr.log("world/trajectory", rr.LineStrips3D(lineStrip))
+                rr.log(
+                    "world/camera/image",
+                    rr.Pinhole(
+                        resolution=[imgFrame.getWidth(), imgFrame.getHeight()],
+                        focal_length=[self.fx, self.fy],
+                        camera_xyz=rr.ViewCoordinates.FLU,
+                    ),
+                )
+                img = imgFrame.getCvFrame()
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                rr.log("world/camera/image/rgb", rr.Image(img))
+                if pclObstData is not None:
+                    points, colors = pclObstData.getPointsRGB()
+                    rr.log(
+                        "world/obstacle_pcl",
+                        rr.Points3D(points, colors=colors, radii=[0.01]),
+                    )
+                if pclGrndData is not None:
+                    points, colors = pclGrndData.getPointsRGB()
+                    rr.log(
+                        "world/ground_pcl",
+                        rr.Points3D(points, colors=colors, radii=[0.01]),
+                    )
+                if mapData is not None:
+                    rr.log("map", rr.Image(mapData.getCvFrame()))