diff --git a/genesis/engine/bvh.py b/genesis/engine/bvh.py index bf84e107e4..79b5c273c3 100644 --- a/genesis/engine/bvh.py +++ b/genesis/engine/bvh.py @@ -235,12 +235,16 @@ def compute_morton_codes(self): self.morton_codes[i_b, i_a] = ti.Vector([morton_code, i_a], dt=ti.u32) @ti.func - def expand_bits(self, v): + def expand_bits(self, v: ti.u32) -> ti.u32: """ Expands a 10-bit integer into 30 bits by inserting 2 zeros before each bit. """ v = (v * ti.u32(0x00010001)) & ti.u32(0xFF0000FF) - v = (v * ti.u32(0x00000101)) & ti.u32(0x0F00F00F) + # This is to silence taichi debug warning of overflow + # Has the same result as v = (v * ti.u32(0x00000101)) & ti.u32(0x0F00F00F) + # Performance difference is negligible + # See https://github.com/Genesis-Embodied-AI/Genesis/pull/1560 for details + v = (v | ((v & 0x00FFFFFF) << 8)) & 0x0F00F00F v = (v * ti.u32(0x00000011)) & ti.u32(0xC30C30C3) v = (v * ti.u32(0x00000005)) & ti.u32(0x49249249) return v @@ -351,21 +355,21 @@ def build_radix_tree(self): delta_min = self.delta(i, i - d, i_b) l_max = ti.u32(2) - while self.delta(i, i + l_max * d, i_b) > delta_min: + while self.delta(i, i + ti.i32(l_max) * d, i_b) > delta_min: l_max *= 2 l = ti.u32(0) t = l_max // 2 while t > 0: - if self.delta(i, i + (l + t) * d, i_b) > delta_min: + if self.delta(i, i + ti.i32(l + t) * d, i_b) > delta_min: l += t t //= 2 - j = i + l * d + j = i + ti.i32(l) * d delta_node = self.delta(i, j, i_b) s = ti.u32(0) t = (l + 1) // 2 while t > 0: - if self.delta(i, i + (s + t) * d, i_b) > delta_node: + if self.delta(i, i + ti.i32(s + t) * d, i_b) > delta_node: s += t t = ti.select(t > 1, (t + 1) // 2, 0) @@ -378,7 +382,7 @@ def build_radix_tree(self): self.nodes[i_b, ti.i32(right)].parent = i @ti.func - def delta(self, i, j, i_b): + def delta(self, i: ti.i32, j: ti.i32, i_b: ti.i32): """ Compute the longest common prefix (LCP) of the morton codes of two AABBs. """ @@ -386,9 +390,9 @@ def delta(self, i, j, i_b): if j >= 0 and j < self.n_aabbs: result = 64 for i_bit in range(2): - x = self.morton_codes[i_b, ti.i32(i)][i_bit] ^ self.morton_codes[i_b, ti.i32(j)][i_bit] + x = self.morton_codes[i_b, i][i_bit] ^ self.morton_codes[i_b, j][i_bit] for b in range(32): - if x & (1 << (31 - b)): + if x & (ti.u32(1) << (31 - b)): result = b + 32 * i_bit break if result != 64: diff --git a/genesis/engine/couplers/sap_coupler.py b/genesis/engine/couplers/sap_coupler.py index 2fb0d10c4b..a832b586be 100644 --- a/genesis/engine/couplers/sap_coupler.py +++ b/genesis/engine/couplers/sap_coupler.py @@ -655,7 +655,7 @@ def compute_inertia_elastic_gradient_alpha(self, i_step: ti.i32): for i_b, i_v in ti.ndrange(self._B, self.fem_solver.n_vertices): if not self.batch_linesearch_active[i_b]: continue - self.linesearch_state.dell_dalpha[i_b] += dp[i_b, i_v].dot(v[i_b, i_v] - v_star[i_step + 1, i_b, i_v]) + self.linesearch_state.dell_dalpha[i_b] += dp[i_b, i_v].dot(v[i_b, i_v] - v_star[i_step + 1, i_v, i_b]) @ti.kernel def compute_inertia_elastic_hessian_alpha(self): @@ -679,7 +679,7 @@ def compute_inertia_elastic_energy_alpha(self, i_step: ti.i32, energy: ti.templa for i_b, i_v in ti.ndrange(self._B, self.fem_solver.n_vertices): if not self.batch_linesearch_active[i_b]: continue - energy[i_b] += alpha[i_b] * dp[i_b, i_v].dot(v[i_b, i_v] - v_star[i_step + 1, i_b, i_v]) + energy[i_b] += alpha[i_b] * dp[i_b, i_v].dot(v[i_b, i_v] - v_star[i_step + 1, i_v, i_b]) def prepare_search_direction_data(self): self.prepare_inertia_elastic_search_direction_data()