[MISC] Fix unit tests failing on GPU. (Genesis-Embodied-AI#1580)

duburcqa · web-flow · commit 60fd1306f0fe · 2025-08-16T21:05:16.000+02:00
diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml
@@ -67,7 +67,7 @@ jobs:
             bash -c "
               pip install --extra-index-url https://pypi.nvidia.com/ omniverse-kit && \
               pip install -e '.[dev,render,usd]' && \
-              pytest -v --dev --forked ./tests
+              pytest -v --backend gpu --dev --forked ./tests
             "
 
       - name: Run benchmarks
diff --git a/genesis/sensors/imu.py b/genesis/sensors/imu.py
@@ -70,10 +70,10 @@ class IMUSharedMetadata(SharedSensorMetadata):
 
     solver: RigidSolver | None = None
     links_idx: list[int] = field(default_factory=list)
-    offsets_pos: torch.Tensor = torch.tensor([])
-    offsets_quat: torch.Tensor = torch.tensor([])
-    acc_bias: torch.Tensor = torch.tensor([])
-    ang_bias: torch.Tensor = torch.tensor([])
+    offsets_pos: torch.Tensor = field(default_factory=lambda: torch.tensor([], dtype=gs.tc_float, device=gs.device))
+    offsets_quat: torch.Tensor = field(default_factory=lambda: torch.tensor([], dtype=gs.tc_float, device=gs.device))
+    acc_bias: torch.Tensor = field(default_factory=lambda: torch.tensor([], dtype=gs.tc_float, device=gs.device))
+    ang_bias: torch.Tensor = field(default_factory=lambda: torch.tensor([], dtype=gs.tc_float, device=gs.device))
 
 
 @register_sensor(IMUOptions, IMUSharedMetadata)
@@ -89,19 +89,28 @@ def build(self):
 
         self._shared_metadata.links_idx.append(self._options.entity_idx + self._options.link_idx_local)
         self._shared_metadata.offsets_pos = torch.cat(
-            [self._shared_metadata.offsets_pos, torch.tensor([self._options.pos_offset], dtype=gs.tc_float)]
+            [
+                self._shared_metadata.offsets_pos,
+                torch.tensor([self._options.pos_offset], dtype=gs.tc_float, device=gs.device),
+            ]
         )
 
-        quat_tensor = torch.tensor(euler_to_quat(self._options.euler_offset), dtype=gs.tc_float).unsqueeze(0)
+        quat_tensor = torch.tensor(euler_to_quat([self._options.euler_offset]), dtype=gs.tc_float, device=gs.device)
         if self._shared_metadata.solver.n_envs > 0:
             quat_tensor = quat_tensor.unsqueeze(0).expand((self._manager._sim._B, 1, 4))
         self._shared_metadata.offsets_quat = torch.cat([self._shared_metadata.offsets_quat, quat_tensor], dim=-2)
 
         self._shared_metadata.acc_bias = torch.cat(
-            [self._shared_metadata.acc_bias, torch.tensor([self._options.accelerometer_bias], dtype=gs.tc_float)]
+            [
+                self._shared_metadata.acc_bias,
+                torch.tensor([self._options.accelerometer_bias], dtype=gs.tc_float, device=gs.device),
+            ]
         )
         self._shared_metadata.ang_bias = torch.cat(
-            [self._shared_metadata.ang_bias, torch.tensor([self._options.gyroscope_bias], dtype=gs.tc_float)]
+            [
+                self._shared_metadata.ang_bias,
+                torch.tensor([self._options.gyroscope_bias], dtype=gs.tc_float, device=gs.device),
+            ]
         )
 
     def _get_return_format(self) -> dict[str, tuple[int, ...]]:
diff --git a/genesis/utils/geom.py b/genesis/utils/geom.py
@@ -493,7 +493,7 @@ def _np_xyz_to_quat(xyz: np.ndarray, rpy: bool = False, out: np.ndarray | None =
     """
     assert xyz.ndim >= 1
     if out is None:
-        out_ = np.empty((*xyz.shape[:-1], 4))
+        out_ = np.empty((*xyz.shape[:-1], 4), dtype=xyz.dtype)
     else:
         assert out.shape == (*xyz.shape[:-1], 4)
         out_ = out
@@ -532,12 +532,13 @@ def _tc_xyz_to_quat(xyz: torch.Tensor, rpy: bool = False, *, out: torch.Tensor |
 
 
 def xyz_to_quat(xyz, rpy=False, degrees=False):
-    if degrees:
-        xyz = xyz * (math.pi / 180.0)
-
     if isinstance(xyz, torch.Tensor):
+        if degrees:
+            xyz = torch.deg2rad(xyz)
         return _tc_xyz_to_quat(xyz, rpy)
     elif isinstance(xyz, np.ndarray):
+        if degrees:
+            xyz = np.deg2rad(xyz)
         return _np_xyz_to_quat(xyz, rpy)
     else:
         gs.raise_exception(f"the input must be either torch.Tensor or np.ndarray. got: {type(xyz)=}")
@@ -703,12 +704,14 @@ def _tc_quat_to_xyz(quat, rpy=False, out=None):
 def quat_to_xyz(quat, rpy=False, degrees=False):
     if isinstance(quat, torch.Tensor):
         rpy = _tc_quat_to_xyz(quat, rpy)
+        if degrees:
+            rpy = torch.rad2deg(rpy)
     elif isinstance(quat, np.ndarray):
         rpy = _np_quat_to_xyz(quat, rpy)
+        if degrees:
+            rpy = np.rad2deg(rpy)
     else:
         gs.raise_exception(f"the input must be either torch.Tensor or np.ndarray. got: {type(quat)=}")
-    if degrees:
-        rpy *= 180.0 / math.pi
     return rpy
 
 
@@ -1233,13 +1236,14 @@ def _tc_z_up_to_R(z, up=None, out=None):
 
     # Handle zero x norm cases
     zero_x_mask = x_norm[..., 0] < gs.EPS
-    if zero_x_mask.any():
+    zero_x_num = zero_x_mask.sum()
+    if zero_x_num:
         # For zero x norm, set identity matrix
-        R[zero_x_mask] = torch.eye(3, device=z.device, dtype=z.dtype)
+        R[zero_x_mask] = torch.eye(3, device=z.device, dtype=z.dtype).unsqueeze(0).expand((zero_x_num, 3, 3))
 
         # Continue with non-zero cases
         valid_mask = ~zero_x_mask
-        if valid_mask.any():
+        if zero_x_num < zero_x_mask.numel():
             z_valid = z[valid_mask]
             x_valid = x[valid_mask]
             y[valid_mask] = torch.cross(z_valid, x_valid, dim=-1)
@@ -1324,7 +1328,7 @@ def _np_euler_to_R(rpy: np.ndarray, out: np.ndarray | None = None) -> np.ndarray
 
 
 def euler_to_R(euler_xyz):
-    return _np_euler_to_R(np.asarray(euler_xyz) * (math.pi / 180.0))
+    return _np_euler_to_R(np.deg2rad(euler_xyz))
 
 
 @nb.jit(nopython=True, cache=True)
diff --git a/genesis/utils/misc.py b/genesis/utils/misc.py
@@ -10,7 +10,7 @@
 import os
 from dataclasses import dataclass
 from collections import OrderedDict
-from typing import Any, Type, NoReturn
+from typing import Any, Type, NoReturn, Optional
 
 import numpy as np
 import cpuinfo
@@ -175,12 +175,13 @@ def get_platform():
     assert False, f"Unknown platform name {name}"
 
 
-def get_device(backend: gs_backend):
+def get_device(backend: gs_backend, device_idx: Optional[int] = None):
     if backend == gs_backend.cuda:
         if not torch.cuda.is_available():
             gs.raise_exception("torch cuda not available")
 
-        device_idx = torch.cuda.current_device()
+        if device_idx is None:
+            device_idx = torch.cuda.current_device()
         device = torch.device("cuda", device_idx)
         device_property = torch.cuda.get_device_properties(device)
         device_name = device_property.name
@@ -192,13 +193,14 @@ def get_device(backend: gs_backend):
 
         # on mac, cpu and gpu are in the same device
         _, device_name, total_mem, _ = get_device(gs_backend.cpu)
-        device = torch.device("mps", 0)
+        device = torch.device("mps", device_idx)
 
     elif backend == gs_backend.vulkan:
         if torch.cuda.is_available():
             device, device_name, total_mem, _ = get_device(gs_backend.cuda)
         elif torch.xpu.is_available():  # pytorch 2.5+ supports Intel XPU device
-            device_idx = torch.xpu.current_device()
+            if device_idx is None:
+                device_idx = torch.xpu.current_device()
             device = torch.device("xpu", device_idx)
             device_property = torch.xpu.get_device_properties(device_idx)
             device_name = device_property.name
@@ -220,7 +222,7 @@ def get_device(backend: gs_backend):
     else:
         device_name = cpuinfo.get_cpu_info()["brand_raw"]
         total_mem = psutil.virtual_memory().total / 1024**3
-        device = torch.device("cpu")
+        device = torch.device("cpu", device_idx)
 
     return device, device_name, total_mem, backend
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -95,6 +95,8 @@ def pytest_xdist_auto_num_workers(config):
     physical_core_count = psutil.cpu_count(logical=config.option.logical)
     _, _, ram_memory, _ = gs.utils.get_device(gs.cpu)
     _, _, vram_memory, backend = gs.utils.get_device(gs.gpu)
+    num_gpus = len(_get_gpu_indices())
+    vram_memory *= num_gpus
     if backend == gs.cpu:
         # Ignore VRAM if no GPU is available
         vram_memory = float("inf")
@@ -124,7 +126,7 @@ def pytest_xdist_auto_num_workers(config):
         num_cpu_per_gpu = 4
         num_workers = min(
             num_workers,
-            len(_get_gpu_indices()),
+            num_gpus,
             max(int(physical_core_count / num_cpu_per_gpu), 1),
         )
 
diff --git a/tests/test_fem.py b/tests/test_fem.py
@@ -589,7 +589,7 @@ def test_fem_articulated(fem_material_linear_corotated_soft, show_viewer):
     assert_allclose(
         min_pos_z,
         -1.0e-3,  # FIXME: Compute desired penetration analytically
-        atol=1e-4,
+        atol=2e-4,
         err_msg=f"Sphere minimum Z position {min_pos_z} is not close to -1.0e-3.",
     )
     assert_allclose(
@@ -667,9 +667,7 @@ def test_implicit_hard_vertex_constraint(fem_material_linear_corotated, show_vie
         tol=gs.EPS,
         err_msg="Vertices should stay at initial target positions with hard constraints",
     )
-    new_target_poss = initial_target_poss + gs.tensor(
-        [[0.1, 0.1, 0.1]],
-    )
+    new_target_poss = initial_target_poss + 0.1
     cube.update_constraint_targets(verts_idx=verts_idx, target_poss=new_target_poss)
     if show_viewer:
         scene.clear_debug_object(sphere)
@@ -689,7 +687,7 @@ def test_implicit_hard_vertex_constraint(fem_material_linear_corotated, show_vie
     if show_viewer:
         scene.clear_debug_object(sphere)
 
-    for _ in range(100):
+    for _ in range(70):
         scene.step()
 
     state = cube.get_state()
@@ -703,7 +701,7 @@ def test_implicit_hard_vertex_constraint(fem_material_linear_corotated, show_vie
 
     velocity = state.vel.mean(axis=(0, 1))
     assert_allclose(
-        velocity, 0.0, atol=1e-5, err_msg=f"Cube velocity {velocity} should be close to zero after settling."
+        velocity, 0.0, atol=4e-5, err_msg=f"Cube velocity {velocity} should be close to zero after settling."
     )
 
     # The contact requires some penetration to generate enough contact force to cancel out gravity
diff --git a/tests/test_rigid_physics.py b/tests/test_rigid_physics.py
@@ -2269,7 +2269,7 @@ def test_gravity(show_viewer, tol):
 
 
 @pytest.mark.required
-@pytest.mark.parametrize("backend", [gs.cpu])
+@pytest.mark.parametrize("backend", [gs.cpu, gs.gpu])
 def test_scene_saver_franka(show_viewer, tol):
     scene1 = gs.Scene(
         show_viewer=show_viewer,
@@ -2290,7 +2290,7 @@ def test_scene_saver_franka(show_viewer, tol):
     target_pose = np.array([0.3, -0.8, 0.4, -1.6, 0.5, 1.0, -0.6, 0.03, 0.03], dtype=float)
     franka1.control_dofs_position(target_pose, dof_idx)
 
-    for _ in range(400):
+    for _ in range(100):
         scene1.step()
 
     pose_ref = franka1.get_dofs_position(dof_idx)
@@ -2307,7 +2307,7 @@ def test_scene_saver_franka(show_viewer, tol):
 
     pose_loaded = franka2.get_dofs_position(dof_idx)
 
-    assert_allclose(pose_ref, pose_loaded, tol=tol)
+    assert_allclose(pose_ref, pose_loaded, tol=2e-7)
 
 
 @pytest.mark.required
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -166,7 +166,7 @@ def test_utils_geom_taichi_vs_tensor_consistency(batch_shape):
 
 @pytest.mark.required
 @pytest.mark.parametrize("batch_shape", [(10, 40, 25), ()])
-def test_utils_geom_numpy_vs_tensor_consistency(batch_shape):
+def test_utils_geom_numpy_vs_tensor_consistency(batch_shape, tol):
     for py_func, shapes_in, shapes_out in (
         (gu.z_up_to_R, [[3], [3], [3, 3]], [[3, 3]]),
         (gu.pos_lookat_up_to_T, [[3], [3], [3]], [[4, 4]]),
@@ -194,7 +194,7 @@ def test_utils_geom_numpy_vs_tensor_consistency(batch_shape):
         tc_outs = tuple(map(tensor_to_array, tc_outs))
 
         for np_out, tc_out in zip(np_outs, tc_outs):
-            np.testing.assert_allclose(np_out, tc_out, atol=gs.EPS)
+            assert_allclose(np_out, tc_out, tol=tol)
 
 
 @pytest.mark.required

Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ jobs:`
`67`	`67`	`bash -c "`
`68`	`68`	`pip install --extra-index-url https://pypi.nvidia.com/ omniverse-kit && \`
`69`	`69`	`pip install -e '.[dev,render,usd]' && \`
`70`		`- pytest -v --dev --forked ./tests`
	`70`	`+ pytest -v --backend gpu --dev --forked ./tests`
`71`	`71`	`"`
`72`	`72`
`73`	`73`	`- name: Run benchmarks`