Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
venv/
notes.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you delete the pycache files and gitignore the folder

53 changes: 53 additions & 0 deletions Basalt_VIO_RTab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import time
import depthai as dai
from rerun_node import RerunNode

# Create pipeline
def add_basalt_vio_rtab(p: dai.Pipeline):
with p:
fps = 60
width = 640
height = 400
# Define sources and outputs
left = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B, sensorFps=fps)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing I'd look into is if you can instantiate the same sources (predominantly the three cameras) only once and/or if this is an issue in either direction

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in if we can instantiate each camera 0 or multiple times?

Copy link
Contributor

@honzikschenk honzikschenk Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example: the left mono camera gets used in both the object localization and VSLAM pipelines. If we could p.create left once and then feed it into both pipelines, that would presumably be more efficient

right = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C, sensorFps=fps)
imu = p.create(dai.node.IMU)
odom = p.create(dai.node.BasaltVIO)
slam = p.create(dai.node.RTABMapSLAM)
stereo = p.create(dai.node.StereoDepth)
params = {"RGBD/CreateOccupancyGrid": "true",
"Grid/3D": "true",
"Rtabmap/SaveWMState": "true"}
slam.setParams(params)

rerunViewer = RerunNode()
imu.enableIMUSensor([dai.IMUSensor.ACCELEROMETER_RAW, dai.IMUSensor.GYROSCOPE_RAW], 200)
imu.setBatchReportThreshold(1)
imu.setMaxBatchReports(10)

stereo.setExtendedDisparity(False)
stereo.setLeftRightCheck(True)
stereo.setSubpixel(True)
stereo.setRectifyEdgeFillColor(0)
stereo.enableDistortionCorrection(True)
stereo.initialConfig.setLeftRightCheckThreshold(10)
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_B)


left.requestOutput((width, height)).link(stereo.left)
right.requestOutput((width, height)).link(stereo.right)
stereo.syncedLeft.link(odom.left)
stereo.syncedRight.link(odom.right)
stereo.depth.link(slam.depth)
stereo.rectifiedLeft.link(slam.rect)
imu.out.link(odom.imu)

odom.transform.link(slam.odom)
slam.transform.link(rerunViewer.inputTrans)
slam.passthroughRect.link(rerunViewer.inputImg)
slam.occupancyGridMap.link(rerunViewer.inputGrid)
slam.obstaclePCL.link(rerunViewer.inputObstaclePCL)
slam.groundPCL.link(rerunViewer.inputGroundPCL)
p.start()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should only start this in the main module once

while p.isRunning():
time.sleep(1)
100 changes: 100 additions & 0 deletions object_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python3

import cv2
import depthai as dai
import time


fullFrameTracking = False

# Create pipeline
def add_object_tracker(pipeline: dai.Pipeline):
with pipeline:
# Define sources and outputs
camRgb = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
monoLeft = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
monoRight = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)

stereo = pipeline.create(dai.node.StereoDepth)
leftOutput = monoLeft.requestOutput((640, 400))
rightOutput = monoRight.requestOutput((640, 400))
leftOutput.link(stereo.left)
rightOutput.link(stereo.right)

spatialDetectionNetwork = pipeline.create(dai.node.SpatialDetectionNetwork).build(camRgb, stereo, "yolov6-nano")
objectTracker = pipeline.create(dai.node.ObjectTracker)

spatialDetectionNetwork.setConfidenceThreshold(0.6)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)
labelMap = spatialDetectionNetwork.getClasses()

objectTracker.setDetectionLabelsToTrack([0]) # track only person
# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
objectTracker.setTrackerType(dai.TrackerType.SHORT_TERM_IMAGELESS)
# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID)

preview = objectTracker.passthroughTrackerFrame.createOutputQueue()
tracklets = objectTracker.out.createOutputQueue()

if fullFrameTracking:
camRgb.requestFullResolutionOutput().link(objectTracker.inputTrackerFrame)
# do not block the pipeline if it's too slow on full frame
objectTracker.inputTrackerFrame.setBlocking(False)
objectTracker.inputTrackerFrame.setMaxSize(1)
else:
spatialDetectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)

spatialDetectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
spatialDetectionNetwork.out.link(objectTracker.inputDetections)

startTime = time.monotonic()
counter = 0
fps = 0
color = (255, 255, 255)
pipeline.start()
while(pipeline.isRunning()):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can't capture the program here as it's just meant to be a quick add to the pipeline function

imgFrame = preview.get()
track = tracklets.get()
assert isinstance(imgFrame, dai.ImgFrame), "Expected ImgFrame"
assert isinstance(track, dai.Tracklets), "Expected Tracklets"

counter+=1
current_time = time.monotonic()
if (current_time - startTime) > 1 :
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time

frame = imgFrame.getCvFrame()
trackletsData = track.tracklets
for t in trackletsData:
roi = t.roi.denormalize(frame.shape[1], frame.shape[0])
x1 = int(roi.topLeft().x)
y1 = int(roi.topLeft().y)
x2 = int(roi.bottomRight().x)
y2 = int(roi.bottomRight().y)

try:
label = labelMap[t.label]
except:
label = t.label

cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"ID: {[t.id]}", (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, t.status.name, (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)

cv2.putText(frame, f"X: {int(t.spatialCoordinates.x)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Y: {int(t.spatialCoordinates.y)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Z: {int(t.spatialCoordinates.z)} mm", (x1 + 10, y1 + 95), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)

cv2.imshow("tracker", frame)

if cv2.waitKey(1) == ord('q'):
break
13 changes: 13 additions & 0 deletions pipeline-combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import Basalt_VIO_RTab
import rerun_node
import object_tracker
import depthai as dai
# This is the main pipeline-combine.py file that integrates Basalt VIO with RTAB-Map SLAM



with dai.Pipeline() as pipeline:

object_tracker.add_object_tracker(pipeline)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very very small nit but if you could make the two functions formatted the same, that'd be much appreciated

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On this note, please rename all the files to be consistent with the rerun_node.py file (snake case)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should also pass in the camerabundle here

Basalt_VIO_RTab.add_basalt_vio_rtab(pipeline)
pipeline.start()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'll have to add a while loop (ex: while pipeline.isRunning()) that persists for a designated amount of time (or until the user stops the program) and then pipeline.stop()

Here's an example of this:

with dai.Pipeline() as pipeline:
    object_tracker.add_object_tracker(pipeline)
    Basalt_VIO_RTab.add_basalt_vio_rtab(pipeline)
    pipeline.start()
    while pipeline.isRunning():
        if [END_CONDITION]:
            pipeline.stop()
            break

3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
depthai
rerun
opencv-python
66 changes: 66 additions & 0 deletions rerun_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import depthai as dai
import sys

from pathlib import Path
installExamplesStr = Path(__file__).absolute().parents[2] / 'install_requirements.py --install_rerun'
try:
import rerun as rr
except ImportError:
sys.exit("Critical dependency missing: Rerun. Please install it using the command: '{} {}' and then rerun the script.".format(sys.executable, installExamplesStr))

import cv2

class RerunNode(dai.node.ThreadedHostNode):
def __init__(self):
dai.node.ThreadedHostNode.__init__(self)
self.inputTrans = dai.Node.Input(self)
self.inputImg = dai.Node.Input(self)
self.inputObstaclePCL = dai.Node.Input(self)
self.inputGroundPCL = dai.Node.Input(self)
self.inputGrid = dai.Node.Input(self)
self.positions = []
self.fx = 400.0
self.fy = 400.0
self.intrinsicsSet = False

def getFocalLengthFromImage(self, imgFrame):
p = self.getParentPipeline()
calibHandler = p.getDefaultDevice().readCalibration()
intrinsics = calibHandler.getCameraIntrinsics(dai.CameraBoardSocket(imgFrame.getInstanceNum()), imgFrame.getWidth(), imgFrame.getHeight())
self.fx = intrinsics[0][0]
self.fy = intrinsics[1][1]
self.intrinsicsSet = True


def run(self):
rr.init("", spawn=True)
rr.log("world", rr.ViewCoordinates.FLU)
rr.log("world/ground", rr.Boxes3D(half_sizes=[3.0, 3.0, 0.00001]))
while self.isRunning():
transData = self.inputTrans.get()
imgFrame = self.inputImg.get()
if not self.intrinsicsSet:
self.getFocalLengthFromImage(imgFrame)
pclObstData = self.inputObstaclePCL.tryGet()
pclGrndData = self.inputGroundPCL.tryGet()
mapData = self.inputGrid.tryGet()
if transData is not None:
trans = transData.getTranslation()
quat = transData.getQuaternion()
position = rr.datatypes.Vec3D([trans.x, trans.y, trans.z])
rr.log("world/camera", rr.Transform3D(translation=position, rotation=rr.datatypes.Quaternion(xyzw=[quat.qx, quat.qy, quat.qz, quat.qw])))
self.positions.append(position)
lineStrip = rr.components.LineStrip3D(self.positions)
rr.log("world/trajectory", rr.LineStrips3D(lineStrip))
rr.log("world/camera/image", rr.Pinhole(resolution=[imgFrame.getWidth(), imgFrame.getHeight()], focal_length=[self.fx, self.fy], camera_xyz=rr.ViewCoordinates.FLU))
img = imgFrame.getCvFrame()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rr.log("world/camera/image/rgb", rr.Image(img))
if pclObstData is not None:
points, colors = pclObstData.getPointsRGB()
rr.log("world/obstacle_pcl", rr.Points3D(points, colors=colors, radii=[0.01]))
if pclGrndData is not None:
points, colors = pclGrndData.getPointsRGB()
rr.log("world/ground_pcl", rr.Points3D(points, colors=colors, radii=[0.01]))
if mapData is not None:
rr.log("map", rr.Image(mapData.getCvFrame()))