Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
90da431
[BUG FIX] Preserve external forces across sub-steps. (#1290)
LeonLiu4 Jun 20, 2025
0864103
[MISC] Reduce warning messages. (#1294)
YilingQiao Jun 20, 2025
8d07248
[MISC] Update default FEM damping 45.0 -> 0.0 (#1288)
Milotrince Jun 20, 2025
6d9f319
bvh query
Libero0809 Jun 4, 2025
197453b
Wrap aabb with python AABB class
Libero0809 Jun 5, 2025
bf0ee25
move query result batch dim
Libero0809 Jun 6, 2025
d385da6
init
Libero0809 Jun 10, 2025
fe432f0
Update bvh.py
duburcqa Jun 10, 2025
b1a7d2d
tet plane detection
Libero0809 Jun 11, 2025
82ffb7e
worked but far from good
Libero0809 Jun 14, 2025
0d564a2
add damping
Libero0809 Jun 16, 2025
1241c5b
add test
Libero0809 Jun 16, 2025
bd5aa0f
change test fem to sapcoupler
Libero0809 Jun 18, 2025
122e221
change test name
Libero0809 Jun 18, 2025
2778529
code cleanup & bug fix
Libero0809 Jun 18, 2025
54d5646
fix test
Libero0809 Jun 19, 2025
e74be79
fix test
Libero0809 Jun 20, 2025
bf60281
update doc for sap
Libero0809 Jun 20, 2025
604aef5
corotated
Libero0809 Jun 19, 2025
3845104
init
Libero0809 Jun 20, 2025
5cc72a0
fix bug in bvh
Libero0809 Jun 21, 2025
07e2112
Merge branch 'main' into bvh
Libero0809 Jun 25, 2025
2c24e59
add gpu test
Libero0809 Jun 26, 2025
d7889fd
change compute bound to layered reduction
Libero0809 Jun 26, 2025
593892b
Merge branch 'main' into bvh
Libero0809 Jun 26, 2025
d33f8dc
make tests faster
Libero0809 Jun 26, 2025
624eed4
code clean up
Libero0809 Jun 26, 2025
98d30ab
removed unused field
Libero0809 Jun 26, 2025
5fad9b7
code cleanup
Libero0809 Jun 27, 2025
253dc10
Merge branch 'main' into bvh
Libero0809 Jun 27, 2025
f04500c
small bug
Libero0809 Jun 28, 2025
86b6b2a
Merge branch 'main' into bvh
duburcqa Jun 28, 2025
4e70712
Minor fixes.
duburcqa Jun 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 73 additions & 44 deletions genesis/engine/bvh.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import genesis as gs
import taichi as ti
from genesis.repr_base import RBC
import numpy as np


@ti.data_oriented
Expand Down Expand Up @@ -157,19 +158,27 @@ class Node:
# Nodes of the BVH, first n_aabbs - 1 are internal nodes, last n_aabbs are leaf nodes
self.nodes = self.Node.field(shape=(self.n_batches, self.n_aabbs * 2 - 1))
# Whether an internal node has been visited during traversal
self.internal_node_visited = ti.field(ti.u8, shape=(self.n_batches, self.n_aabbs - 1))
self.internal_node_active = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1))
self.internal_node_ready = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1))
self.updated = ti.field(ti.u1, shape=())

# Query results, vec3 of batch id, self id, query id
self.query_result = ti.field(gs.ti_ivec3, shape=(self.max_n_query_results))
# Count of query results
self.query_result_count = ti.field(ti.i32, shape=())

@ti.kernel
def build(self):
"""
Build the BVH from the axis-aligned bounding boxes (AABBs).
"""
self.compute_aabb_centers_and_scales()
self.compute_morton_codes()
self.radix_sort_morton_codes()
self.build_radix_tree()
self.compute_bounds()

@ti.kernel
def compute_aabb_centers_and_scales(self):
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
self.aabb_centers[i_b, i_a] = (self.aabbs[i_b, i_a].min + self.aabbs[i_b, i_a].max) / 2

Expand All @@ -184,14 +193,9 @@ def build(self):
for i_b in ti.ndrange(self.n_batches):
scale = self.aabb_max[i_b] - self.aabb_min[i_b]
for i in ti.static(range(3)):
self.scale[i_b][i] = ti.select(scale[i] > 1e-7, 1.0 / scale[i], 1)
self.scale[i_b][i] = ti.select(scale[i] > gs.EPS, 1.0 / scale[i], 1.0)

self.compute_morton_codes()
self.radix_sort_morton_codes()
self.build_radix_tree()
self.compute_bounds()

@ti.func
@ti.kernel
def compute_morton_codes(self):
"""
Compute the Morton codes for each AABB.
Expand Down Expand Up @@ -223,38 +227,43 @@ def expand_bits(self, v):
v = (v * ti.u32(0x00000005)) & ti.u32(0x49249249)
return v

@ti.func
def radix_sort_morton_codes(self):
"""
Radix sort the morton codes, using 8 bits at a time.
"""
for i in ti.static(range(8)):
# Clear histogram
for i_b, j in ti.ndrange(self.n_batches, 256):
self.hist[i_b, j] = 0
for i in range(8):
self._kernel_radix_sort_morton_codes_one_round(i)

# Fill histogram
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
@ti.kernel
def _kernel_radix_sort_morton_codes_one_round(self, i: int):
# Clear histogram
self.hist.fill(0)

# Fill histogram
for i_b in range(self.n_batches):
# This is now sequential
# TODO Parallelize, need to use groups to handle data to remain stable, could be not worth it
for i_a in range(self.n_aabbs):
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
self.offset[i_b, i_a] = ti.atomic_add(self.hist[i_b, ti.i32(code)], 1)

# Compute prefix sum
for i_b in ti.ndrange(self.n_batches):
self.prefix_sum[i_b, 0] = 0
for j in range(1, 256): # sequential prefix sum
self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1]
# Compute prefix sum
for i_b in ti.ndrange(self.n_batches):
self.prefix_sum[i_b, 0] = 0
for j in range(1, 256): # sequential prefix sum
Copy link
Contributor

@erizmr erizmr Jun 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though it has been merged, just to mention there is a built-in parallel prefix sum, which might be useful: https://github.com/taichi-dev/taichi/blob/master/python/taichi/algorithms/_algorithms.py#L42

Copy link
Collaborator

@duburcqa duburcqa Jun 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh really! This is interesting!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out. I think if we are doing prefix sum for just 256 elements, sequential would be fine.

self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1]

# Reorder morton codes
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)])
self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a]
# Reorder morton codes
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)])
self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a]

# Swap the temporary and original morton codes
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a]
# Swap the temporary and original morton codes
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a]

@ti.func
@ti.kernel
def build_radix_tree(self):
"""
Build the radix tree from the sorted morton codes.
Expand Down Expand Up @@ -321,31 +330,51 @@ def delta(self, i, j, i_b):
break
return result

@ti.func
def compute_bounds(self):
"""
Compute the bounds of the BVH nodes.

Starts from the leaf nodes and works upwards.
Starts from the leaf nodes and works upwards layer by layer.
"""
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
self.internal_node_visited[i_b, i] = ti.u8(0)
self._kernel_compute_bounds_init()
while self.updated[None]:
self._kernel_compute_bounds_one_layer()

@ti.kernel
def _kernel_compute_bounds_init(self):
self.updated[None] = True
self.internal_node_active.fill(0)
self.internal_node_ready.fill(0)

for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs):
idx = ti.i32(self.morton_codes[i_b, i])
self.nodes[i_b, i + self.n_aabbs - 1].bound.min = self.aabbs[i_b, idx].min
self.nodes[i_b, i + self.n_aabbs - 1].bound.max = self.aabbs[i_b, idx].max
parent_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent
if parent_idx != -1:
self.internal_node_active[i_b, parent_idx] = 1

cur_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent
while cur_idx != -1:
visited = ti.u1(ti.atomic_or(self.internal_node_visited[i_b, cur_idx], ti.u8(1)))
if not visited:
break
left_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].left].bound
right_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].right].bound
self.nodes[i_b, cur_idx].bound.min = ti.min(left_bound.min, right_bound.min)
self.nodes[i_b, cur_idx].bound.max = ti.max(left_bound.max, right_bound.max)
cur_idx = self.nodes[i_b, cur_idx].parent
@ti.kernel
def _kernel_compute_bounds_one_layer(self):
self.updated[None] = False
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
if self.internal_node_active[i_b, i] == 0:
continue
left_bound = self.nodes[i_b, self.nodes[i_b, i].left].bound
right_bound = self.nodes[i_b, self.nodes[i_b, i].right].bound
self.nodes[i_b, i].bound.min = ti.min(left_bound.min, right_bound.min)
self.nodes[i_b, i].bound.max = ti.max(left_bound.max, right_bound.max)
parent_idx = self.nodes[i_b, i].parent
if parent_idx != -1:
self.internal_node_ready[i_b, parent_idx] = 1
self.internal_node_active[i_b, i] = 0
self.updated[None] = True

for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
if self.internal_node_ready[i_b, i] == 0:
continue
self.internal_node_active[i_b, i] = 1
self.internal_node_ready[i_b, i] = 0

@ti.kernel
def query(self, aabbs: ti.template()):
Expand Down
7 changes: 3 additions & 4 deletions genesis/engine/coupler.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ def __init__(
self._n_linesearch_iterations = options.n_linesearch_iterations
self._linesearch_c = options.linesearch_c
self._linesearch_tau = options.linesearch_tau
self.default_deformable_g = 1.0e8 # default deformable geometry size

def build(self) -> None:
self._B = self.sim._B
Expand Down Expand Up @@ -698,6 +699,7 @@ def init_fem_fields(self):
self.max_fem_floor_contact_pairs = fem_solver.n_surfaces * fem_solver._B
self.n_fem_floor_contact_pairs = ti.field(gs.ti_int, shape=())
self.fem_floor_contact_pairs = self.fem_floor_contact_pair_type.field(shape=(self.max_fem_floor_contact_pairs,))

# Lookup table for marching tetrahedra edges
kMarchingTetsEdgeTable_np = np.array(
[
Expand Down Expand Up @@ -934,15 +936,12 @@ def fem_floor_detection(self, f: ti.i32):
)
self.fem_floor_contact_pairs[i_c].barycentric = barycentric

C = ti.static(1.0e8)
deformable_g = C
rigid_g = self.fem_pressure_gradient[i_b, i_e].z
# TODO A better way to handle corner cases where pressure and pressure gradient are ill defined
if total_area < gs.EPS or rigid_g < gs.EPS:
self.fem_floor_contact_pairs[i_c].active = 0
continue
g = 1.0 / (1.0 / deformable_g + 1.0 / rigid_g) # harmonic average
deformable_k = total_area * C
g = self.default_deformable_g * rigid_g / (self.default_deformable_g + rigid_g) # harmonic average
rigid_k = total_area * g
rigid_phi0 = -pressure / g
rigid_fn0 = total_area * pressure
Expand Down
1 change: 1 addition & 0 deletions genesis/engine/materials/FEM/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def build_linear_corotated(self, fem_solver):

@ti.func
def pre_compute_linear_corotated(self, J, F, i_e, i_b):
# Computing Polar Decomposition instead of calling `R, P = ti.polar_decompose(F)` since `P` is not needed here
U, S, V = ti.svd(F)
R = U @ V.transpose()
self.R[i_b, i_e] = R
Expand Down
26 changes: 26 additions & 0 deletions genesis/engine/solvers/fem_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,32 @@ def _func_compute_ele_energy(self, f: ti.i32):

self.elements_el_energy[i_b, i_e].energy += 0.5 * damping_beta_over_dt * St_x_diff.dot(H_St_x_diff)

# add linearized damping energy
if self._damping_beta > gs.EPS:
damping_beta_over_dt = self._damping_beta / self._substep_dt
i_v = self.elements_i[i_e].el2v
S = ti.Matrix.zero(gs.ti_float, 4, 3)
B = self.elements_i[i_e].B
S[:3, :] = B
S[3, :] = -B[0, :] - B[1, :] - B[2, :]

x_diff = ti.Vector.zero(gs.ti_float, 12)
for i in ti.static(range(4)):
x_diff[i * 3 : i * 3 + 3] = (
self.elements_v[f + 1, i_v[i], i_b].pos - self.elements_v[f, i_v[i], i_b].pos
)
St_x_diff = ti.Vector.zero(gs.ti_float, 9)
for i, j in ti.static(ti.ndrange(3, 4)):
St_x_diff[i * 3 : i * 3 + 3] += S[j, i] * x_diff[j * 3 : j * 3 + 3]

H_St_x_diff = ti.Vector.zero(gs.ti_float, 9)
for i, j in ti.static(ti.ndrange(3, 3)):
H_St_x_diff[i * 3 : i * 3 + 3] += (
self.elements_el_hessian[i_b, i, j, i_e] @ St_x_diff[j * 3 : j * 3 + 3]
)

self.elements_el_energy[i_b, i_e].energy += 0.5 * damping_beta_over_dt * St_x_diff.dot(H_St_x_diff)

@ti.kernel
def accumulate_vertex_force_preconditioner(self, f: ti.i32):
damping_alpha_dt = self._damping_alpha * self._substep_dt
Expand Down
12 changes: 9 additions & 3 deletions tests/test_bvh.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@
def lbvh():
"""Fixture for a LBVH tree"""

n_aabbs = 20
n_aabbs = 500
n_batches = 10
aabb = AABB(n_batches=n_batches, n_aabbs=n_aabbs)
min = np.random.rand(n_batches, n_aabbs, 3).astype(np.float32)
min = np.random.rand(n_batches, n_aabbs, 3).astype(np.float32) * 20.0
max = min + np.random.rand(n_batches, n_aabbs, 3).astype(np.float32)

aabb.aabbs.min.from_numpy(min)
aabb.aabbs.max.from_numpy(max)

lbvh = LBVH(aabb)
lbvh = LBVH(aabb, max_n_query_result_per_aabb=32)
lbvh.build()
return lbvh

Expand Down Expand Up @@ -70,6 +70,7 @@ def test_expand_bits():
), f"Expected {str_expanded_x}, got {''.join(f'00{bit}' for bit in str_x)}"


@pytest.mark.parametrize("backend", [gs.cpu, gs.gpu])
def test_build_tree(lbvh):
nodes = lbvh.nodes.to_numpy()
n_aabbs = lbvh.n_aabbs
Expand Down Expand Up @@ -116,13 +117,18 @@ def test_build_tree(lbvh):
assert_allclose(parent_max, parent_max_expected, atol=1e-6, rtol=1e-5)


@pytest.mark.parametrize("backend", [gs.cpu, gs.gpu])
def test_query(lbvh):
aabbs = lbvh.aabbs

# Query the tree
lbvh.query(aabbs)

query_result_count = lbvh.query_result_count.to_numpy()
if query_result_count > lbvh.max_n_query_results:
raise ValueError(
f"Query result count {query_result_count} exceeds max_n_query_results {lbvh.max_n_query_results}"
)
query_result = lbvh.query_result.to_numpy()

n_aabbs = lbvh.n_aabbs
Expand Down