@@ -235,12 +235,16 @@ def compute_morton_codes(self):
235235 self .morton_codes [i_b , i_a ] = ti .Vector ([morton_code , i_a ], dt = ti .u32 )
236236
237237 @ti .func
238- def expand_bits (self , v ) :
238+ def expand_bits (self , v : ti . u32 ) -> ti . u32 :
239239 """
240240 Expands a 10-bit integer into 30 bits by inserting 2 zeros before each bit.
241241 """
242242 v = (v * ti .u32 (0x00010001 )) & ti .u32 (0xFF0000FF )
243- v = (v * ti .u32 (0x00000101 )) & ti .u32 (0x0F00F00F )
243+ # This is to silence taichi debug warning of overflow
244+ # Has the same result as v = (v * ti.u32(0x00000101)) & ti.u32(0x0F00F00F)
245+ # Performance difference is negligible
246+ # See https://github.com/Genesis-Embodied-AI/Genesis/pull/1560 for details
247+ v = (v | ((v & 0x00FFFFFF ) << 8 )) & 0x0F00F00F
244248 v = (v * ti .u32 (0x00000011 )) & ti .u32 (0xC30C30C3 )
245249 v = (v * ti .u32 (0x00000005 )) & ti .u32 (0x49249249 )
246250 return v
@@ -351,21 +355,21 @@ def build_radix_tree(self):
351355
352356 delta_min = self .delta (i , i - d , i_b )
353357 l_max = ti .u32 (2 )
354- while self .delta (i , i + l_max * d , i_b ) > delta_min :
358+ while self .delta (i , i + ti . i32 ( l_max ) * d , i_b ) > delta_min :
355359 l_max *= 2
356360 l = ti .u32 (0 )
357361
358362 t = l_max // 2
359363 while t > 0 :
360- if self .delta (i , i + (l + t ) * d , i_b ) > delta_min :
364+ if self .delta (i , i + ti . i32 (l + t ) * d , i_b ) > delta_min :
361365 l += t
362366 t //= 2
363- j = i + l * d
367+ j = i + ti . i32 ( l ) * d
364368 delta_node = self .delta (i , j , i_b )
365369 s = ti .u32 (0 )
366370 t = (l + 1 ) // 2
367371 while t > 0 :
368- if self .delta (i , i + (s + t ) * d , i_b ) > delta_node :
372+ if self .delta (i , i + ti . i32 (s + t ) * d , i_b ) > delta_node :
369373 s += t
370374 t = ti .select (t > 1 , (t + 1 ) // 2 , 0 )
371375
@@ -378,17 +382,17 @@ def build_radix_tree(self):
378382 self .nodes [i_b , ti .i32 (right )].parent = i
379383
380384 @ti .func
381- def delta (self , i , j , i_b ):
385+ def delta (self , i : ti . i32 , j : ti . i32 , i_b : ti . i32 ):
382386 """
383387 Compute the longest common prefix (LCP) of the morton codes of two AABBs.
384388 """
385389 result = - 1
386390 if j >= 0 and j < self .n_aabbs :
387391 result = 64
388392 for i_bit in range (2 ):
389- x = self .morton_codes [i_b , ti . i32 ( i ) ][i_bit ] ^ self .morton_codes [i_b , ti . i32 ( j ) ][i_bit ]
393+ x = self .morton_codes [i_b , i ][i_bit ] ^ self .morton_codes [i_b , j ][i_bit ]
390394 for b in range (32 ):
391- if x & (1 << (31 - b )):
395+ if x & (ti . u32 ( 1 ) << (31 - b )):
392396 result = b + 32 * i_bit
393397 break
394398 if result != 64 :
0 commit comments