Skip to content

Commit decef89

Browse files
authored
Merge branch 'main' into feature/urdf
2 parents 1590625 + 95c5a3e commit decef89

40 files changed

+1144
-424
lines changed

.github/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Thank you for your interest in contributing to Genesis! We welcome contributions
6161
- (Optional) You can run CI tests locally to ensure you pass the online CI checks.
6262

6363
```python
64-
python -m unittest discover tests
64+
pytest -v --forked -m required ./tests
6565
```
6666

6767
- In the title of your Pull Request, please include [BUG FIX], [FEATURE] or [MISC] to indicate the purpose.

.github/workflows/linux-gpu.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ jobs:
3535
3636
mkdir -p "${HOME}/.cache"
3737
38+
# Prefer idle nodes if any
39+
IDLE_NODES=$(sinfo -h -o "%N %t" | awk '$2 == "idle" {print $1}')
40+
if [[ -n "$IDLE_NODES" ]]; then
41+
NODELIST="--nodelist=$IDLE_NODES"
42+
fi
43+
3844
srun \
3945
--container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \
4046
--container-mounts=\
@@ -44,7 +50,7 @@ jobs:
4450
--export=\
4551
HF_TOKEN="${HF_TOKEN}",\
4652
NVIDIA_DRIVER_CAPABILITIES=all \
47-
--partition=hpc-mid --nodes=1 --gpus=1 --time="${TIMEOUT_MINUTES}" \
53+
--partition=hpc-mid ${NODELIST} --nodes=1 --time="${TIMEOUT_MINUTES}" \
4854
--job-name=${SLURM_JOB_NAME} \
4955
bash -c "
5056
pip install -e '.[dev,render]' && \
@@ -69,16 +75,16 @@ jobs:
6975
"${{ github.workspace }}":/root/workspace \
7076
--no-container-mount-home --container-workdir=/root/workspace \
7177
--export=${SLURM_ENV_VARS} \
72-
--partition=hpc-mid --exclusive --nodes=1 --gpus=1 --time="${TIMEOUT_MINUTES}" \
78+
--partition=hpc-mid --exclusive --nodes=1 --time="${TIMEOUT_MINUTES}" \
7379
--job-name=${SLURM_JOB_NAME} \
7480
bash -c "
7581
: # sudo apt install -y tmate && \
7682
tmate -S /tmp/tmate.sock new-session -d && \
7783
tmate -S /tmp/tmate.sock wait tmate-ready && \
7884
tmate -S /tmp/tmate.sock display -p '#{tmate_ssh}'
7985
pip install -e '.[dev,render]' && \
80-
pytest --print -x -m 'benchmarks' --backend gpu ./tests && \
81-
cp 'speed_test.txt' '/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt'
86+
pytest --print -x -m 'benchmarks' ./tests && \
87+
cat speed_test*.txt > '/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt'
8288
: # tmate -S /tmp/tmate.sock wait tmate-exit
8389
"
8490

examples/drone/hover_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def step(self, actions):
134134
self.drone.set_propellels_rpm((1 + exec_actions * 0.8) * 14468.429183500699)
135135
# update target pos
136136
if self.target is not None:
137-
self.target.set_pos(self.commands, zero_velocity=True, envs_idx=list(range(self.num_envs)))
137+
self.target.set_pos(self.commands, zero_velocity=True)
138138
self.scene.step()
139139

140140
# update buffers

genesis/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def init(
5050
backend=None,
5151
theme="dark",
5252
logger_verbose_time=False,
53+
performance_mode: bool = False, # True: compilation up to 6x slower (GJK), but runs ~1-5% faster
5354
):
5455
# Consider Genesis as initialized right away
5556
global _initialized
@@ -172,6 +173,12 @@ def init(
172173
torch.backends.cudnn.benchmark = False
173174
logger.info("Beware running Genesis in debug mode dramatically reduces runtime speed.")
174175

176+
if not performance_mode:
177+
logger.info(
178+
"Consider setting 'performance_mode=True' in production to maximise runtime speed, if significantly "
179+
"increasing compilation time is not a concern."
180+
)
181+
175182
if seed is not None:
176183
global SEED
177184
SEED = seed
@@ -197,6 +204,7 @@ def init(
197204
force_scalarize_matrix=True,
198205
# Turning off 'advanced_optimization' is causing issues on MacOS
199206
advanced_optimization=True,
207+
cfg_optimization=performance_mode,
200208
fast_math=not debug,
201209
default_ip=ti_int,
202210
default_fp=ti_float,
@@ -291,6 +299,7 @@ def _display_greeting(INFO_length):
291299
wave_width = max(0, min(38, wave_width))
292300
bar_width = wave_width * 2 + 9
293301
wave = ("┈┉" * wave_width)[:wave_width]
302+
global logger
294303
logger.info(f"~<╭{'─'*(bar_width)}╮>~")
295304
logger.info(f"~<│{wave}>~ ~~~~<Genesis>~~~~ ~<{wave}│>~")
296305
logger.info(f"~<╰{'─'*(bar_width)}╯>~")
@@ -314,9 +323,10 @@ def _custom_excepthook(exctype, value, tb):
314323
print("".join(traceback.format_exception(exctype, value, tb)))
315324

316325
# Logger the exception right before exit if possible
326+
global logger
317327
try:
318328
logger.error(f"{exctype.__name__}: {value}")
319-
except AttributeError:
329+
except (AttributeError, NameError):
320330
# Logger may not be configured at this point
321331
pass
322332

genesis/engine/bvh.py

Lines changed: 73 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import genesis as gs
22
import taichi as ti
33
from genesis.repr_base import RBC
4+
import numpy as np
45

56

67
@ti.data_oriented
@@ -157,19 +158,27 @@ class Node:
157158
# Nodes of the BVH, first n_aabbs - 1 are internal nodes, last n_aabbs are leaf nodes
158159
self.nodes = self.Node.field(shape=(self.n_batches, self.n_aabbs * 2 - 1))
159160
# Whether an internal node has been visited during traversal
160-
self.internal_node_visited = ti.field(ti.u8, shape=(self.n_batches, self.n_aabbs - 1))
161+
self.internal_node_active = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1))
162+
self.internal_node_ready = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1))
163+
self.updated = ti.field(ti.u1, shape=())
161164

162165
# Query results, vec3 of batch id, self id, query id
163166
self.query_result = ti.field(gs.ti_ivec3, shape=(self.max_n_query_results))
164167
# Count of query results
165168
self.query_result_count = ti.field(ti.i32, shape=())
166169

167-
@ti.kernel
168170
def build(self):
169171
"""
170172
Build the BVH from the axis-aligned bounding boxes (AABBs).
171173
"""
174+
self.compute_aabb_centers_and_scales()
175+
self.compute_morton_codes()
176+
self.radix_sort_morton_codes()
177+
self.build_radix_tree()
178+
self.compute_bounds()
172179

180+
@ti.kernel
181+
def compute_aabb_centers_and_scales(self):
173182
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
174183
self.aabb_centers[i_b, i_a] = (self.aabbs[i_b, i_a].min + self.aabbs[i_b, i_a].max) / 2
175184

@@ -184,14 +193,9 @@ def build(self):
184193
for i_b in ti.ndrange(self.n_batches):
185194
scale = self.aabb_max[i_b] - self.aabb_min[i_b]
186195
for i in ti.static(range(3)):
187-
self.scale[i_b][i] = ti.select(scale[i] > 1e-7, 1.0 / scale[i], 1)
196+
self.scale[i_b][i] = ti.select(scale[i] > gs.EPS, 1.0 / scale[i], 1.0)
188197

189-
self.compute_morton_codes()
190-
self.radix_sort_morton_codes()
191-
self.build_radix_tree()
192-
self.compute_bounds()
193-
194-
@ti.func
198+
@ti.kernel
195199
def compute_morton_codes(self):
196200
"""
197201
Compute the Morton codes for each AABB.
@@ -223,38 +227,43 @@ def expand_bits(self, v):
223227
v = (v * ti.u32(0x00000005)) & ti.u32(0x49249249)
224228
return v
225229

226-
@ti.func
227230
def radix_sort_morton_codes(self):
228231
"""
229232
Radix sort the morton codes, using 8 bits at a time.
230233
"""
231-
for i in ti.static(range(8)):
232-
# Clear histogram
233-
for i_b, j in ti.ndrange(self.n_batches, 256):
234-
self.hist[i_b, j] = 0
234+
for i in range(8):
235+
self._kernel_radix_sort_morton_codes_one_round(i)
235236

236-
# Fill histogram
237-
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
237+
@ti.kernel
238+
def _kernel_radix_sort_morton_codes_one_round(self, i: int):
239+
# Clear histogram
240+
self.hist.fill(0)
241+
242+
# Fill histogram
243+
for i_b in range(self.n_batches):
244+
# This is now sequential
245+
# TODO Parallelize, need to use groups to handle data to remain stable, could be not worth it
246+
for i_a in range(self.n_aabbs):
238247
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
239248
self.offset[i_b, i_a] = ti.atomic_add(self.hist[i_b, ti.i32(code)], 1)
240249

241-
# Compute prefix sum
242-
for i_b in ti.ndrange(self.n_batches):
243-
self.prefix_sum[i_b, 0] = 0
244-
for j in range(1, 256): # sequential prefix sum
245-
self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1]
250+
# Compute prefix sum
251+
for i_b in ti.ndrange(self.n_batches):
252+
self.prefix_sum[i_b, 0] = 0
253+
for j in range(1, 256): # sequential prefix sum
254+
self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1]
246255

247-
# Reorder morton codes
248-
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
249-
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
250-
idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)])
251-
self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a]
256+
# Reorder morton codes
257+
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
258+
code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF
259+
idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)])
260+
self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a]
252261

253-
# Swap the temporary and original morton codes
254-
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
255-
self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a]
262+
# Swap the temporary and original morton codes
263+
for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs):
264+
self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a]
256265

257-
@ti.func
266+
@ti.kernel
258267
def build_radix_tree(self):
259268
"""
260269
Build the radix tree from the sorted morton codes.
@@ -321,31 +330,51 @@ def delta(self, i, j, i_b):
321330
break
322331
return result
323332

324-
@ti.func
325333
def compute_bounds(self):
326334
"""
327335
Compute the bounds of the BVH nodes.
328336
329-
Starts from the leaf nodes and works upwards.
337+
Starts from the leaf nodes and works upwards layer by layer.
330338
"""
331-
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
332-
self.internal_node_visited[i_b, i] = ti.u8(0)
339+
self._kernel_compute_bounds_init()
340+
while self.updated[None]:
341+
self._kernel_compute_bounds_one_layer()
342+
343+
@ti.kernel
344+
def _kernel_compute_bounds_init(self):
345+
self.updated[None] = True
346+
self.internal_node_active.fill(0)
347+
self.internal_node_ready.fill(0)
333348

334349
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs):
335350
idx = ti.i32(self.morton_codes[i_b, i])
336351
self.nodes[i_b, i + self.n_aabbs - 1].bound.min = self.aabbs[i_b, idx].min
337352
self.nodes[i_b, i + self.n_aabbs - 1].bound.max = self.aabbs[i_b, idx].max
353+
parent_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent
354+
if parent_idx != -1:
355+
self.internal_node_active[i_b, parent_idx] = 1
338356

339-
cur_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent
340-
while cur_idx != -1:
341-
visited = ti.u1(ti.atomic_or(self.internal_node_visited[i_b, cur_idx], ti.u8(1)))
342-
if not visited:
343-
break
344-
left_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].left].bound
345-
right_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].right].bound
346-
self.nodes[i_b, cur_idx].bound.min = ti.min(left_bound.min, right_bound.min)
347-
self.nodes[i_b, cur_idx].bound.max = ti.max(left_bound.max, right_bound.max)
348-
cur_idx = self.nodes[i_b, cur_idx].parent
357+
@ti.kernel
358+
def _kernel_compute_bounds_one_layer(self):
359+
self.updated[None] = False
360+
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
361+
if self.internal_node_active[i_b, i] == 0:
362+
continue
363+
left_bound = self.nodes[i_b, self.nodes[i_b, i].left].bound
364+
right_bound = self.nodes[i_b, self.nodes[i_b, i].right].bound
365+
self.nodes[i_b, i].bound.min = ti.min(left_bound.min, right_bound.min)
366+
self.nodes[i_b, i].bound.max = ti.max(left_bound.max, right_bound.max)
367+
parent_idx = self.nodes[i_b, i].parent
368+
if parent_idx != -1:
369+
self.internal_node_ready[i_b, parent_idx] = 1
370+
self.internal_node_active[i_b, i] = 0
371+
self.updated[None] = True
372+
373+
for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1):
374+
if self.internal_node_ready[i_b, i] == 0:
375+
continue
376+
self.internal_node_active[i_b, i] = 1
377+
self.internal_node_ready[i_b, i] = 0
349378

350379
@ti.kernel
351380
def query(self, aabbs: ti.template()):

genesis/engine/coupler.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,7 @@ def __init__(
649649
self._n_linesearch_iterations = options.n_linesearch_iterations
650650
self._linesearch_c = options.linesearch_c
651651
self._linesearch_tau = options.linesearch_tau
652+
self.default_deformable_g = 1.0e8 # default deformable geometry size
652653

653654
def build(self) -> None:
654655
self._B = self.sim._B
@@ -698,6 +699,7 @@ def init_fem_fields(self):
698699
self.max_fem_floor_contact_pairs = fem_solver.n_surfaces * fem_solver._B
699700
self.n_fem_floor_contact_pairs = ti.field(gs.ti_int, shape=())
700701
self.fem_floor_contact_pairs = self.fem_floor_contact_pair_type.field(shape=(self.max_fem_floor_contact_pairs,))
702+
701703
# Lookup table for marching tetrahedra edges
702704
kMarchingTetsEdgeTable_np = np.array(
703705
[
@@ -934,15 +936,12 @@ def fem_floor_detection(self, f: ti.i32):
934936
)
935937
self.fem_floor_contact_pairs[i_c].barycentric = barycentric
936938

937-
C = ti.static(1.0e8)
938-
deformable_g = C
939939
rigid_g = self.fem_pressure_gradient[i_b, i_e].z
940940
# TODO A better way to handle corner cases where pressure and pressure gradient are ill defined
941941
if total_area < gs.EPS or rigid_g < gs.EPS:
942942
self.fem_floor_contact_pairs[i_c].active = 0
943943
continue
944-
g = 1.0 / (1.0 / deformable_g + 1.0 / rigid_g) # harmonic average
945-
deformable_k = total_area * C
944+
g = self.default_deformable_g * rigid_g / (self.default_deformable_g + rigid_g) # harmonic average
946945
rigid_k = total_area * g
947946
rigid_phi0 = -pressure / g
948947
rigid_fn0 = total_area * pressure

genesis/engine/entities/hybrid_entity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,8 @@ def _kernel_update_soft_part_mpm(self, f: ti.i32):
431431
acc = vel_d / dt_for_rigid_acc
432432
frc_vel = mass_real * acc
433433
frc_ang = (x_pos - link.COM).cross(frc_vel)
434-
self._solver_rigid.links_state[link_idx, i_b].cfrc_ext_vel += frc_vel
435-
self._solver_rigid.links_state[link_idx, i_b].cfrc_ext_ang += frc_ang
434+
self._solver_rigid.links_state[link_idx, i_b].cfrc_applied_vel += frc_vel
435+
self._solver_rigid.links_state[link_idx, i_b].cfrc_applied_ang += frc_ang
436436

437437
# rigid-to-soft coupling # NOTE: this may lead to unstable feedback loop
438438
self._solver_soft.particles[f_, i_global, i_b].vel += vel_d * self.material.soft_dv_coef

genesis/engine/entities/rigid_entity/rigid_entity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2748,7 +2748,7 @@ def get_contacts(self, with_entity=None, exclude_self_contact=False):
27482748
if self._solver.n_envs == 0:
27492749
contacts_info = {key: value[valid_mask] for key, value in contacts_info.items()}
27502750
else:
2751-
contacts_info = {key: value[:, valid_mask] for key, value in contacts_info.items()}
2751+
contacts_info["valid_mask"] = valid_mask
27522752

27532753
contacts_info["force_a"] = -contacts_info["force"]
27542754
contacts_info["force_b"] = +contacts_info["force"]

genesis/engine/materials/FEM/elastic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def build_linear_corotated(self, fem_solver):
8282

8383
@ti.func
8484
def pre_compute_linear_corotated(self, J, F, i_e, i_b):
85+
# Computing Polar Decomposition instead of calling `R, P = ti.polar_decompose(F)` since `P` is not needed here
8586
U, S, V = ti.svd(F)
8687
R = U @ V.transpose()
8788
self.R[i_b, i_e] = R
@@ -431,7 +432,7 @@ def compute_energy_gradient_hessian_linear_corotated(self, mu, lam, J, F, actu,
431432
for i, k in ti.static(ti.ndrange(3, 3)):
432433
hessian_field[i_b, i, i, i_e][k, k] = mu
433434

434-
for i, j, alpha, beta in ti.static(ti.ndrange(3, 3, 3, 3)):
435+
for i, j, alpha, beta in ti.ndrange(3, 3, 3, 3):
435436
hessian_field[i_b, j, beta, i_e][i, alpha] += mu * R[i, beta] * R[alpha, j] + lam * R[alpha, beta] * R[i, j]
436437

437438
return energy, gradient

0 commit comments

Comments
 (0)