[BUG FIX] Fix contact information retrieval again. (#1320)

duburcqa · web-flow · commit fd06778cae57 · 2025-06-26T10:44:15.000-07:00
* Fix contact information retrival.
* Run CI on idle coreweave nodes preferably.
* Enable running benchmarks on multiple GPUs.
diff --git a/.github/workflows/linux-gpu.yml b/.github/workflows/linux-gpu.yml
@@ -35,6 +35,12 @@ jobs:
 
           mkdir -p "${HOME}/.cache"
 
+          # Prefer idle nodes if any
+          IDLE_NODES=$(sinfo -h -o "%N %t" | awk '$2 == "idle" {print $1}')
+          if [[ -n "$IDLE_NODES" ]]; then
+            NODELIST="--nodelist=$IDLE_NODES"
+          fi
+
           srun \
             --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \
             --container-mounts=\
@@ -44,7 +50,7 @@ jobs:
             --export=\
           HF_TOKEN="${HF_TOKEN}",\
           NVIDIA_DRIVER_CAPABILITIES=all \
-            --partition=hpc-mid --nodes=1 --gpus=1 --time="${TIMEOUT_MINUTES}" \
+            --partition=hpc-mid ${NODELIST} --nodes=1 --time="${TIMEOUT_MINUTES}" \
             --job-name=${SLURM_JOB_NAME} \
             bash -c "
               pip install -e '.[dev,render]' && \
@@ -69,16 +75,16 @@ jobs:
           "${{ github.workspace }}":/root/workspace \
             --no-container-mount-home --container-workdir=/root/workspace \
             --export=${SLURM_ENV_VARS} \
-            --partition=hpc-mid --exclusive --nodes=1 --gpus=1 --time="${TIMEOUT_MINUTES}" \
+            --partition=hpc-mid --exclusive --nodes=1 --time="${TIMEOUT_MINUTES}" \
             --job-name=${SLURM_JOB_NAME} \
             bash -c "
               : # sudo apt install -y tmate && \
               tmate -S /tmp/tmate.sock new-session -d && \
               tmate -S /tmp/tmate.sock wait tmate-ready && \
               tmate -S /tmp/tmate.sock display -p '#{tmate_ssh}'
               pip install -e '.[dev,render]' && \
-              pytest --print -x -m 'benchmarks' --backend gpu ./tests && \
-              cp 'speed_test.txt' '/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt'
+              pytest --print -x -m 'benchmarks' ./tests && \
+              cat speed_test*.txt > '/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt'
               : # tmate -S /tmp/tmate.sock wait tmate-exit
             "
 
diff --git a/examples/drone/hover_env.py b/examples/drone/hover_env.py
@@ -134,7 +134,7 @@ def step(self, actions):
         self.drone.set_propellels_rpm((1 + exec_actions * 0.8) * 14468.429183500699)
         # update target pos
         if self.target is not None:
-            self.target.set_pos(self.commands, zero_velocity=True, envs_idx=list(range(self.num_envs)))
+            self.target.set_pos(self.commands, zero_velocity=True)
         self.scene.step()
 
         # update buffers
diff --git a/genesis/__init__.py b/genesis/__init__.py
@@ -291,6 +291,7 @@ def _display_greeting(INFO_length):
     wave_width = max(0, min(38, wave_width))
     bar_width = wave_width * 2 + 9
     wave = ("┈┉" * wave_width)[:wave_width]
+    global logger
     logger.info(f"~<╭{'─'*(bar_width)}╮>~")
     logger.info(f"~<│{wave}>~ ~~~~<Genesis>~~~~ ~<{wave}│>~")
     logger.info(f"~<╰{'─'*(bar_width)}╯>~")
@@ -314,9 +315,10 @@ def _custom_excepthook(exctype, value, tb):
     print("".join(traceback.format_exception(exctype, value, tb)))
 
     # Logger the exception right before exit if possible
+    global logger
     try:
         logger.error(f"{exctype.__name__}: {value}")
-    except AttributeError:
+    except (AttributeError, NameError):
         # Logger may not be configured at this point
         pass
 
diff --git a/genesis/engine/entities/rigid_entity/rigid_entity.py b/genesis/engine/entities/rigid_entity/rigid_entity.py
@@ -2748,7 +2748,7 @@ def get_contacts(self, with_entity=None, exclude_self_contact=False):
         if self._solver.n_envs == 0:
             contacts_info = {key: value[valid_mask] for key, value in contacts_info.items()}
         else:
-            contacts_info = {key: value[:, valid_mask] for key, value in contacts_info.items()}
+            contacts_info["valid_mask"] = valid_mask
 
         contacts_info["force_a"] = -contacts_info["force"]
         contacts_info["force_b"] = +contacts_info["force"]
diff --git a/genesis/engine/solvers/rigid/collider_decomp.py b/genesis/engine/solvers/rigid/collider_decomp.py
@@ -2088,19 +2088,20 @@ def get_contacts(self, as_tensor: bool = True, to_torch: bool = True):
         # Allocate output buffer
         if to_torch:
             iout = torch.full((out_size, 4), -1, dtype=gs.tc_int, device=gs.device)
-            fout = torch.empty((out_size, 10), dtype=gs.tc_float, device=gs.device)
+            fout = torch.zeros((out_size, 10), dtype=gs.tc_float, device=gs.device)
         else:
             iout = np.full((out_size, 4), -1, dtype=gs.np_int)
-            fout = np.empty((out_size, 10), dtype=gs.np_float)
+            fout = np.zeros((out_size, 10), dtype=gs.np_float)
 
         # Copy contact data
-        self._kernel_get_contacts(as_tensor, iout, fout)
+        if n_contacts_max > 0:
+            self._kernel_get_contacts(as_tensor, iout, fout)
 
-        # Return structured view (no copy)
+        # Build structured view (no copy)
         if as_tensor:
             if self._solver.n_envs > 0:
-                iout = iout.reshape((n_contacts_max, n_envs, -1))
-                fout = fout.reshape((n_contacts_max, n_envs, -1))
+                iout = iout.reshape((n_envs, n_contacts_max, 4))
+                fout = fout.reshape((n_envs, n_contacts_max, 10))
             iout_chunks = (iout[..., 0], iout[..., 1], iout[..., 2], iout[..., 3])
             fout_chunks = (fout[..., 0], fout[..., 1:4], fout[..., 4:7], fout[..., 7:])
             values = (*iout_chunks, *fout_chunks)
@@ -2135,10 +2136,7 @@ def get_contacts(self, as_tensor: bool = True, to_torch: bool = True):
                     values = (*iout_chunks, *fout_chunks)
 
         contacts_info = dict(
-            zip(
-                ("link_a", "link_b", "geom_a", "geom_b", "penetration", "position", "normal", "force"),
-                (value.swapaxes(0, 1) for value in values) if as_tensor and self._solver.n_envs > 0 else values,
-            )
+            zip(("link_a", "link_b", "geom_a", "geom_b", "penetration", "position", "normal", "force"), values)
         )
 
         # Cache contact information before returning
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,6 @@
 import gc
 import os
+import re
 import sys
 from enum import Enum
 
@@ -30,11 +31,23 @@ def pytest_make_parametrize_id(config, val, argname):
 
 @pytest.hookimpl(tryfirst=True)
 def pytest_cmdline_main(config: pytest.Config) -> None:
-    # Force disabling distributed framework if benchmarks are selected
+    # Make sure that benchmarks are running on GPU and the number of workers if valid
     expr = Expression.compile(config.option.markexpr)
     is_benchmarks = expr.evaluate(MarkMatcher.from_markers((pytest.mark.benchmarks,)))
     if is_benchmarks:
-        config.option.numprocesses = 0
+        # Make sure that GPU backend is enforced
+        backend = config.getoption("--backend")
+        if backend == "cpu":
+            raise ValueError("Running benchmarks on CPU is not supported.")
+        config.option.backend = "gpu"
+
+        # Make sure that the number of workers is not too large
+        if isinstance(config.option.numprocesses, int):
+            max_workers = max(pytest_xdist_auto_num_workers(config), 1)
+            if config.option.numprocesses > max_workers:
+                raise ValueError(
+                    f"The number of workers for running benchmarks cannot exceed '{max_workers}' on this machine."
+                )
 
     # Force disabling forked for non-linux systems
     if not sys.platform.startswith("linux"):
@@ -45,21 +58,40 @@ def pytest_cmdline_main(config: pytest.Config) -> None:
     if show_viewer:
         config.option.numprocesses = 0
 
-    # Disable low-level parallelization if distributed framework is enabled
-    # if config.option.numprocesses > 0:
-    os.environ["OMP_NUM_THREADS"] = "1"
-    os.environ["OPENBLAS_NUM_THREADS"] = "1"
-    os.environ["MKL_NUM_THREADS"] = "1"
-    os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
-    os.environ["NUMEXPR_NUM_THREADS"] = "1"
-    os.environ["NUMBA_NUM_THREADS"] = "1"
+    # Disable low-level parallelization if distributed framework is enabled.
+    # FIXME: It should be set to `max(int(physical_core_count / num_workers), 1)`, but 'num_workers' may be unknown.
+    if not is_benchmarks and config.option.numprocesses != 0:
+        os.environ["OMP_NUM_THREADS"] = "1"
+        os.environ["OPENBLAS_NUM_THREADS"] = "1"
+        os.environ["MKL_NUM_THREADS"] = "1"
+        os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
+        os.environ["NUMEXPR_NUM_THREADS"] = "1"
+        os.environ["NUMBA_NUM_THREADS"] = "1"
+
+
+def _get_gpu_indices():
+    nvidia_gpu_indices = os.environ.get("CUDA_VISIBLE_DEVICES")
+    if nvidia_gpu_indices is not None:
+        return tuple(sorted(map(int, nvidia_gpu_indices.split(","))))
+
+    if sys.platform == "linux":
+        nvidia_gpu_indices = []
+        nvidia_gpu_interface_path = "/proc/driver/nvidia/gpus/"
+        if os.path.exists(nvidia_gpu_interface_path):
+            for device_path in os.listdir(nvidia_gpu_interface_path):
+                with open(os.path.join(nvidia_gpu_interface_path, device_path, "information"), "r") as f:
+                    gpu_id = int(re.search(r"Device Minor:\s+(\d+)", f.read()).group(1))
+                nvidia_gpu_indices.append(gpu_id)
+            return tuple(sorted(nvidia_gpu_indices))
+
+    return (0,)
 
 
 def pytest_xdist_auto_num_workers(config):
-    # Get available memory (RAM & VRAM) and number of physical cores.
     import psutil
     import genesis as gs
 
+    # Get available memory (RAM & VRAM) and number of physical cores.
     physical_core_count = psutil.cpu_count(logical=False)
     _, _, ram_memory, _ = gs.utils.get_device(gs.cpu)
     _, _, vram_memory, _ = gs.utils.get_device(gs.gpu)
@@ -76,15 +108,39 @@ def pytest_xdist_auto_num_workers(config):
     else:
         ram_memory_per_worker = 7.5
         vram_memory_per_worker = 1.6
-    return min(
-        int(ram_memory / ram_memory_per_worker),
-        int(vram_memory / vram_memory_per_worker),
+    num_workers = min(
         physical_core_count,
+        max(int(ram_memory / ram_memory_per_worker), 1),
+        max(int(vram_memory / vram_memory_per_worker), 1),
     )
 
+    # Special treatment for benchmarks
+    expr = Expression.compile(config.option.markexpr)
+    is_benchmarks = expr.evaluate(MarkMatcher.from_markers((pytest.mark.benchmarks,)))
+    if is_benchmarks:
+        num_cpu_per_gpu = 4
+        num_workers = min(
+            num_workers,
+            len(_get_gpu_indices()),
+            max(int(physical_core_count / num_cpu_per_gpu), 1),
+        )
+
+    return num_workers
+
+
+def pytest_runtest_setup(item):
+    # Enforce GPU affinity that distributed framework is enabled
+    worker_id = os.environ.get("PYTEST_XDIST_WORKER")
+    if worker_id and worker_id.startswith("gw"):
+        worker_num = int(worker_id[2:])
+        gpu_indices = _get_gpu_indices()
+        gpu_num = worker_num % len(gpu_indices)
+        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_indices[gpu_num])
+        os.environ["TI_VISIBLE_DEVICE"] = str(gpu_indices[gpu_num])
+
 
 def pytest_addoption(parser):
-    parser.addoption("--backend", action="store", default="cpu", help="Default simulation backend.")
+    parser.addoption("--backend", action="store", default=None, help="Default simulation backend.")
     parser.addoption("--vis", action="store_true", default=False, help="Enable interactive viewer.")
 
 
@@ -97,7 +153,7 @@ def show_viewer(pytestconfig):
 def backend(pytestconfig):
     import genesis as gs
 
-    backend = pytestconfig.getoption("--backend", "cpu")
+    backend = pytestconfig.getoption("--backend") or gs.cpu
     if isinstance(backend, str):
         return getattr(gs.constants.backend, backend)
     return backend
@@ -220,6 +276,7 @@ def initialize_genesis(request, backend, taichi_offline_cache):
     else:
         precision = "32"
         debug = False
+
     try:
         if not taichi_offline_cache:
             os.environ["TI_OFFLINE_CACHE"] = "0"
diff --git a/tests/test_rigid_benchmarks.py b/tests/test_rigid_benchmarks.py
@@ -1,13 +1,14 @@
 import hashlib
 import numbers
 import os
-import pytest
+import tempfile
 import time
 from enum import Enum
 from pathlib import Path
 from typing import Any
 
 import numpy as np
+import pytest
 import torch
 import wandb
 
@@ -216,10 +217,25 @@ def get_file_morph_options(**kwargs):
 
 @pytest.fixture(scope="session")
 def stream_writers(backend, printer_session):
-    log_path = Path(REPORT_FILE)
-    if os.path.exists(log_path):
-        os.remove(log_path)
-    fd = open(log_path, "w")
+    report_path = Path(REPORT_FILE)
+
+    # Delete old unrelated worker-specific reports
+    worker_id = os.environ.get("PYTEST_XDIST_WORKER")
+    if worker_id == "gw0":
+        worker_count = int(os.environ["PYTEST_XDIST_WORKER_COUNT"])
+
+        for path in report_path.parent.glob("-".join((report_path.stem, "*.txt"))):
+            _, worker_id = path.stem.rsplit("-", 1)
+            worker_num = int(worker_id[2:])
+            if worker_num >= worker_count:
+                path.unlink()
+
+    # Create new empty worker-specific report
+    report_name = "-".join((report_path.stem, worker_id))
+    report_path = report_path.with_name(f"{report_name}.txt")
+    if report_path.exists():
+        report_path.unlink()
+    fd = open(report_path, "w")
 
     yield (lambda msg: print(msg, file=fd), printer_session)
 
diff --git a/tests/test_rigid_physics.py b/tests/test_rigid_physics.py
@@ -2361,18 +2361,42 @@ def test_data_accessor(n_envs, batched, tol):
     # Simulate for a while, until they collide with something
     for _ in range(400):
         gs_sim.step()
-        gs_n_contacts = gs_sim.rigid_solver.collider.n_contacts.to_torch(device="cpu")
+
+        gs_n_contacts = gs_sim.rigid_solver.collider.n_contacts.to_numpy()
+        assert len(gs_n_contacts) == max(n_envs, 1)
+        for as_tensor in (False, True):
+            for to_torch in (False, True):
+                contacts_info = gs_sim.rigid_solver.collider.get_contacts(as_tensor, to_torch)
+                for value in contacts_info.values():
+                    if n_envs > 0:
+                        assert n_envs == len(value)
+                    else:
+                        assert gs_n_contacts[0] == len(value)
+                        value = value[None] if as_tensor else (value,)
+
+                    for i_b in range(n_envs):
+                        n_contacts = gs_n_contacts[i_b]
+                        if as_tensor:
+                            assert isinstance(value, torch.Tensor if to_torch else np.ndarray)
+                            if value.dtype in (gs.tc_int, gs.np_int):
+                                assert (value[i_b, :n_contacts] != -1).all()
+                                assert (value[i_b, n_contacts:] == -1).all()
+                            else:
+                                assert_allclose(value[i_b, n_contacts:], 0.0, tol=0)
+                        else:
+                            assert isinstance(value, (list, tuple))
+                            assert value[i_b].shape[0] == n_contacts
+                            if value[i_b].dtype in (gs.tc_int, gs.np_int):
+                                assert (value[i_b] != -1).all()
+
         if (gs_n_contacts > 0).all():
             break
     else:
         assert False
     gs_sim.rigid_solver._kernel_forward_dynamics()
     gs_sim.rigid_solver._func_constraint_force()
 
-    # Make sure that contact info accessor is working
-    for as_tensor in (False, True):
-        for to_torch in (False, True):
-            contacts_info = gs_sim.rigid_solver.collider.get_contacts(as_tensor, to_torch)
+    gs_robot.get_contacts()
 
     # Make sure that all the robots ends up in the different state
     qposs = gs_robot.get_qpos().cpu()