Genesis-Embodied-AI
diff --git a/‎examples/rigid/single_franka_batch_render.py‎
Lines changed: 15 additions & 0 deletions b/‎examples/rigid/single_franka_batch_render.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎genesis/engine/bvh.py‎
Lines changed: 27 additions & 15 deletions b/‎genesis/engine/bvh.py‎
Lines changed: 27 additions & 15 deletions
@@ -15,6 +15,7 @@ def main():
     parser.add_argument("-r", "--render_all_cameras", action="store_true", default=False)
     parser.add_argument("-o", "--output_dir", type=str, default="img_output/test")
     parser.add_argument("-u", "--use_rasterizer", action="store_true", default=False)
+    parser.add_argument("-d", "--debug", action="store_true", default=False)
     args = parser.parse_args()
 
     ########################## init ##########################
@@ -37,6 +38,14 @@ def main():
     )
 
     ########################## cameras ##########################
+    debug_cam = scene.add_camera(
+        res=(720, 1280),
+        pos=(1.5, -0.5, 1.0),
+        lookat=(0.0, 0.0, 0.5),
+        fov=60,
+        GUI=args.vis,
+        debug=True,
+    )
     cam_0 = scene.add_camera(
         res=(512, 512),
         pos=(1.5, 0.5, 1.5),
@@ -75,14 +84,20 @@ def main():
     # Create an image exporter
     exporter = FrameImageExporter(args.output_dir)
 
+    if args.debug:
+        debug_cam.start_recording()
     for i in range(args.n_steps):
         scene.step()
+        if args.debug:
+            debug_cam.render()
         if args.render_all_cameras:
             rgba, depth, _, _ = scene.render_all_cameras(rgb=True, depth=True)
             exporter.export_frame_all_cameras(i, rgb=rgba, depth=depth)
         else:
             rgba, depth, _, _ = cam_1.render(rgb=True, depth=True)
             exporter.export_frame_single_camera(i, cam_1.idx, rgb=rgba, depth=depth)
+    if args.debug:
+        debug_cam.stop_recording("debug_cam.mp4")
 
 
 if __name__ == "__main__":
 
@@ -235,12 +235,16 @@ def compute_morton_codes(self):
             self.morton_codes[i_b, i_a] = ti.Vector([morton_code, i_a], dt=ti.u32)
 
     @ti.func
-    def expand_bits(self, v):
+    def expand_bits(self, v: ti.u32) -> ti.u32:
         """
         Expands a 10-bit integer into 30 bits by inserting 2 zeros before each bit.
         """
         v = (v * ti.u32(0x00010001)) & ti.u32(0xFF0000FF)
-        v = (v * ti.u32(0x00000101)) & ti.u32(0x0F00F00F)
+        # This is to silence taichi debug warning of overflow
+        # Has the same result as v = (v * ti.u32(0x00000101)) & ti.u32(0x0F00F00F)
+        # Performance difference is negligible
+        # See https://github.com/Genesis-Embodied-AI/Genesis/pull/1560 for details
+        v = (v | ((v & 0x00FFFFFF) << 8)) & 0x0F00F00F
         v = (v * ti.u32(0x00000011)) & ti.u32(0xC30C30C3)
         v = (v * ti.u32(0x00000005)) & ti.u32(0x49249249)
         return v
@@ -277,8 +281,8 @@ def _kernel_radix_sort_morton_codes_one_round(self, i: int):
 
         # Reorder morton codes
         for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
-            code = (self.morton_codes[i_b, i_a][1 - (i // 4)] >> ((i % 4) * 8)) & 0xFF
-            idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)])
+            code = ti.i32((self.morton_codes[i_b, i_a][1 - (i // 4)] >> ((i % 4) * 8)) & 0xFF)
+            idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, code])
             self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a]
 
         # Swap the temporary and original morton codes
@@ -351,21 +355,21 @@ def build_radix_tree(self):
 
             delta_min = self.delta(i, i - d, i_b)
             l_max = ti.u32(2)
-            while self.delta(i, i + l_max * d, i_b) > delta_min:
+            while self.delta(i, i + ti.i32(l_max) * d, i_b) > delta_min:
                 l_max *= 2
             l = ti.u32(0)
 
             t = l_max // 2
             while t > 0:
-                if self.delta(i, i + (l + t) * d, i_b) > delta_min:
+                if self.delta(i, i + ti.i32(l + t) * d, i_b) > delta_min:
                     l += t
                 t //= 2
-            j = i + l * d
+            j = i + ti.i32(l) * d
             delta_node = self.delta(i, j, i_b)
             s = ti.u32(0)
             t = (l + 1) // 2
             while t > 0:
-                if self.delta(i, i + (s + t) * d, i_b) > delta_node:
+                if self.delta(i, i + ti.i32(s + t) * d, i_b) > delta_node:
                     s += t
                 t = ti.select(t > 1, (t + 1) // 2, 0)
 
@@ -378,17 +382,17 @@ def build_radix_tree(self):
             self.nodes[i_b, ti.i32(right)].parent = i
 
     @ti.func
-    def delta(self, i, j, i_b):
+    def delta(self, i: ti.i32, j: ti.i32, i_b: ti.i32):
         """
         Compute the longest common prefix (LCP) of the morton codes of two AABBs.
         """
         result = -1
         if j >= 0 and j < self.n_aabbs:
             result = 64
             for i_bit in range(2):
-                x = self.morton_codes[i_b, ti.i32(i)][i_bit] ^ self.morton_codes[i_b, ti.i32(j)][i_bit]
+                x = self.morton_codes[i_b, i][i_bit] ^ self.morton_codes[i_b, j][i_bit]
                 for b in range(32):
-                    if x & (1 << (31 - b)):
+                    if x & (ti.u32(1) << (31 - b)):
                         result = b + 32 * i_bit
                         break
                 if result != 64:
@@ -441,14 +445,15 @@ def _kernel_compute_bounds_one_layer(self) -> ti.i32:
 
         return is_done
 
-    @ti.kernel
+    @ti.func
     def query(self, aabbs: ti.template()):
         """
         Query the BVH for intersections with the given AABBs.
 
         The results are stored in the query_result field.
         """
         self.query_result_count[None] = 0
+        overflow = False
 
         n_querys = aabbs.shape[1]
         for i_b, i_q in ti.ndrange(self.n_batches, n_querys):
@@ -470,6 +475,8 @@ def query(self, aabbs: ti.template()):
                         idx = ti.atomic_add(self.query_result_count[None], 1)
                         if idx < self.max_n_query_results:
                             self.query_result[idx] = gs.ti_ivec3(i_b, i_a, i_q)  # Store the AABB index
+                        else:
+                            overflow = True
                     else:
                         # Push children onto the stack
                         if node.right != -1:
@@ -479,6 +486,8 @@ def query(self, aabbs: ti.template()):
                             query_stack[stack_depth] = node.left
                             stack_depth += 1
 
+        return overflow
+
 
 @ti.data_oriented
 class FEMSurfaceTetLBVH(LBVH):
@@ -499,10 +508,13 @@ def filter(self, i_a, i_q):
 
         This is used to avoid self-collisions in FEM surface tets.
 
-        i_a: index of the found AABB
-        i_q: index of the query AABB
+        Parameters
+        ----------
+        i_a:
+            index of the found AABB
+        i_q:
+            index of the query AABB
         """
-
         result = i_a >= i_q
         i_av = self.fem_solver.elements_i[self.fem_solver.surface_elements[i_a]].el2v
         i_qv = self.fem_solver.elements_i[self.fem_solver.surface_elements[i_q]].el2v