Merge pull request #150 from ichumuh/fast_vibes

ichumuh · web-flow · commit 2cb6e9c127a4 · 2025-12-01T11:11:28.000+01:00
Faster state synchronization
diff --git a/src/semantic_digital_twin/adapters/ros/world_synchronizer.py b/src/semantic_digital_twin/adapters/ros/world_synchronizer.py
@@ -229,17 +229,28 @@ def world_callback(self):
         self.publish(msg)
 
     def compute_state_changes(self) -> Dict[UUID, float]:
-        changes = {
-            _id: current_state
-            for _id, current_state in zip(
-                self.world.state.keys(), self.world.state.positions
-            )
-            if _id not in self.previous_world_state_data
-            or not np.allclose(
-                current_state, self.previous_world_state_data[_id].position
-            )
-        }
-        return changes
+        """
+        Compute and return only the position changes since the last published snapshot.
+
+        Returns a mapping of DOF name to current position for entries whose position
+        differs from the previous snapshot, using a vectorized tolerance-based diff.
+        """
+        ids = self.world.state.keys()  # List[PrefixedName] in column order
+        curr = self.world.state.positions  # np.ndarray shape (N,)
+        prev = self.previous_world_state_data  # np.ndarray shape (N,)
+
+        # If the number of DOFs changed (model update), send everything once
+        # so the other side can resync, then the snapshot will be updated afterward.
+        if prev.shape != curr.shape:
+            return {n: float(v) for n, v in zip(ids, curr)}
+
+        # Vectorized comparison: O(N) with minimal Python overhead
+        changed_mask = ~np.isclose(curr, prev, rtol=1e-8, atol=1e-12, equal_nan=True)
+        if not np.any(changed_mask):
+            return {}
+
+        idx = np.nonzero(changed_mask)[0]
+        return {ids[i]: float(curr[i]) for i in idx}
 
 
 @dataclass
@@ -259,10 +270,15 @@ def __post_init__(self):
         SynchronizerOnCallback.__post_init__(self)
 
     def apply_message(self, msg: ModificationBlock):
-        for callback in self.world.state.state_change_callbacks:
+        running_callbacks = [
+            callback
+            for callback in self.world.state.state_change_callbacks
+            if not callback._is_paused
+        ]
+        for callback in running_callbacks:
             callback.pause()
         msg.modifications.apply(self.world)
-        for callback in self.world.state.state_change_callbacks:
+        for callback in running_callbacks:
             callback.resume()
 
     def world_callback(self):
diff --git a/src/semantic_digital_twin/spatial_types/spatial_types.py b/src/semantic_digital_twin/spatial_types/spatial_types.py
@@ -240,9 +240,23 @@ def _setup_constant_result(self) -> None:
         self._function_evaluator()
         self._is_constant = True
 
+    def bind_args_to_memory_view(self, arg_idx: int, numpy_array: np.ndarray) -> None:
+        """
+        Binds the arg at index arg_idx to the memoryview of a numpy_array.
+        If your args keep the same memory across calls, you only need to bind them once.
+        """
+        self._function_buffer.set_arg(arg_idx, memoryview(numpy_array))
+
+    def evaluate(self) -> Union[np.ndarray, sp.csc_matrix]:
+        """
+        Evaluate the compiled function with the current args.
+        """
+        self._function_evaluator()
+        return self._out
+
     def __call__(self, *args: np.ndarray) -> Union[np.ndarray, sp.csc_matrix]:
         """
-        Efficiently evaluate the compiled function with positional arguments, by directly writing the memory of the
+        Efficiently evaluate the compiled function with positional arguments by directly writing the memory of the
         numpy arrays to the memoryview of the compiled function.
         Similarly, the result will be written to the output buffer and doesn't allocate new memory on each eval.
 
@@ -262,9 +276,8 @@ def __call__(self, *args: np.ndarray) -> Union[np.ndarray, sp.csc_matrix]:
                 actual_number_of_args,
             )
         for arg_idx, arg in enumerate(args):
-            self._function_buffer.set_arg(arg_idx, memoryview(arg))
-        self._function_evaluator()
-        return self._out
+            self.bind_args_to_memory_view(arg_idx, arg)
+        return self.evaluate()
 
     def call_with_kwargs(self, **kwargs: float) -> np.ndarray:
         """
@@ -1183,6 +1196,10 @@ def fmod(a: ScalarData, b: ScalarData) -> Expression:
     return Expression(ca.fmod(a, b))
 
 
+def sum(*expressions: ScalarData) -> Expression:
+    return Expression(ca.sum(to_sx(Expression(expressions))))
+
+
 def normalize_angle_positive(angle: ScalarData) -> Expression:
     """
     Normalizes the angle to be 0 to 2*pi
diff --git a/test/test_casadi/test_casadi_wrapper.py b/test/test_casadi/test_casadi_wrapper.py
@@ -3261,6 +3261,56 @@ def test_stacked_compiled_function_dense(self):
         assert_allclose(actual_e1, expected_e1)
         assert_allclose(actual_e2, expected_e2)
 
+    def test_single_args(self):
+        size = 10_000
+        variables = cas.create_float_variables([str(i) for i in range(size)])
+        expr = cas.sum(*variables)
+        f = expr.compile()
+        for i in range(10):
+            data = np.random.rand(size)
+            assert np.isclose(f(data), np.sum(data))
+
+    def test_single_args_with_bind(self):
+        size = 10_000
+        data = np.random.rand(size)
+        variables = cas.create_float_variables([str(i) for i in range(size)])
+        expr = cas.sum(*variables)
+        f = expr.compile()
+        f.bind_args_to_memory_view(0, data)
+        for i in range(10):
+            np.copyto(data, np.random.rand(size))
+            assert np.isclose(f.evaluate(), np.sum(data))
+
+    def test_multiple_args(self):
+        size = 10_000
+        n = 10
+        element_size = size // n
+        variables = cas.create_float_variables([str(i) for i in range(size)])
+        expr = cas.sum(*variables)
+        args = [variables[i * element_size : (i + 1) * element_size] for i in range(n)]
+        f = expr.compile(parameters=args)
+        for i in range(100):
+            args_values = [np.ones(element_size)] * n
+            assert f(*args_values) == size
+
+    def test_multiple_args_with_bind(self):
+        size = 10_000
+        n = 10
+        element_size = size // n
+        variables = cas.create_float_variables([str(i) for i in range(size)])
+        expr = cas.sum(*variables)
+        args = [variables[i * element_size : (i + 1) * element_size] for i in range(n)]
+        f = expr.compile(parameters=args)
+
+        datas = []
+        for i in range(n):
+            datas.append(np.random.rand(element_size))
+            f.bind_args_to_memory_view(i, datas[i])
+        for i in range(100):
+            for i in range(n):
+                datas[i][:] = np.random.rand(element_size)
+            assert np.isclose(f.evaluate(), np.sum(datas))
+
     def test_missing_free_variables(self):
         s1, s2 = cas.create_float_variables(["s1", "s2"])
         e = cas.sqrt(cas.cos(s1) + cas.sin(s2))
diff --git a/test/test_ros/test_world_synchronizer.py b/test/test_ros/test_world_synchronizer.py
@@ -4,6 +4,7 @@
 import unittest
 import uuid
 from typing import Optional
+from uuid import UUID, uuid4
 
 import numpy as np
 import sqlalchemy
@@ -51,17 +52,37 @@ def deterministic_uuid(seed: str) -> uuid.UUID:
         w.add_degree_of_freedom(y_dof)
         z_dof = DegreeOfFreedom(name=PrefixedName("z"), id=deterministic_uuid("z_dof"))
         w.add_degree_of_freedom(z_dof)
-        qx_dof = DegreeOfFreedom(name=PrefixedName("qx"), id=deterministic_uuid("qx_dof"))
+        qx_dof = DegreeOfFreedom(
+            name=PrefixedName("qx"), id=deterministic_uuid("qx_dof")
+        )
         w.add_degree_of_freedom(qx_dof)
-        qy_dof = DegreeOfFreedom(name=PrefixedName("qy"), id=deterministic_uuid("qy_dof"))
+        qy_dof = DegreeOfFreedom(
+            name=PrefixedName("qy"), id=deterministic_uuid("qy_dof")
+        )
         w.add_degree_of_freedom(qy_dof)
-        qz_dof = DegreeOfFreedom(name=PrefixedName("qz"), id=deterministic_uuid("qz_dof"))
+        qz_dof = DegreeOfFreedom(
+            name=PrefixedName("qz"), id=deterministic_uuid("qz_dof")
+        )
         w.add_degree_of_freedom(qz_dof)
-        qw_dof = DegreeOfFreedom(name=PrefixedName("qw"), id=deterministic_uuid("qw_dof"))
+        qw_dof = DegreeOfFreedom(
+            name=PrefixedName("qw"), id=deterministic_uuid("qw_dof")
+        )
         w.add_degree_of_freedom(qw_dof)
         w.state[qw_dof.id].position = 1.0
 
-        w.add_connection(Connection6DoF(parent=b1, child=b2, x_id=x_dof.id, y_id=y_dof.id, z_id=z_dof.id, qx_id=qx_dof.id, qy_id=qy_dof.id, qz_id=qz_dof.id, qw_id=qw_dof.id))
+        w.add_connection(
+            Connection6DoF(
+                parent=b1,
+                child=b2,
+                x_id=x_dof.id,
+                y_id=y_dof.id,
+                z_id=z_dof.id,
+                qx_id=qx_dof.id,
+                qy_id=qy_dof.id,
+                qz_id=qz_dof.id,
+                qw_id=qw_dof.id,
+            )
+        )
     return w
 
 
@@ -428,5 +449,49 @@ def test_synchronize_6dof(rclpy_node):
     np.testing.assert_array_almost_equal(w1.state.data, w2.state.data)
 
 
+def test_compute_state_changes_no_changes(rclpy_node):
+    w = create_dummy_world()
+    s = StateSynchronizer(node=rclpy_node, world=w)
+    # Immediately compare without changing state
+    changes = s.compute_state_changes()
+    assert changes == {}
+    s.close()
+
+
+def test_compute_state_changes_single_change(rclpy_node):
+    w = create_dummy_world()
+    s = StateSynchronizer(node=rclpy_node, world=w)
+    # change first position
+    w.state.data[0, 0] += 1e-3
+    changes = s.compute_state_changes()
+    names = w.state.keys()
+    assert list(changes.keys()) == [names[0]]
+    assert np.isclose(changes[names[0]], w.state.positions[0])
+    s.close()
+
+
+def test_compute_state_changes_shape_change_full_snapshot(rclpy_node):
+    w = create_dummy_world()
+    s = StateSynchronizer(node=rclpy_node, world=w)
+    # append a new DOF by writing a new name into state
+    new_uuid = uuid4()
+    w.state._add_dof(new_uuid)
+    w.state[new_uuid] = np.zeros(4)
+    changes = s.compute_state_changes()
+    # full snapshot expected
+    assert len(changes) == len(w.state)
+    s.close()
+
+
+def test_compute_state_changes_nan_handling(rclpy_node):
+    w = create_dummy_world()
+    s = StateSynchronizer(node=rclpy_node, world=w)
+    # set both previous and current to NaN for entry 0
+    w.state.data[0, 0] = np.nan
+    s.previous_world_state_data[0] = np.nan
+    assert s.compute_state_changes() == {}
+    s.close()
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_worlds/test_world.py b/test/test_worlds/test_world.py
@@ -221,8 +221,10 @@ def test_compute_fk(world_setup):
 
     connection: PrismaticConnection = world.get_connection(r1, r2)
 
+    state_memory_id = id(world.state.data)
     world.state[connection.dof.id].position = 1.0
     world.notify_state_change()
+    assert state_memory_id == id(world.state.data)
     fk = world.compute_forward_kinematics_np(l2, r2)
     assert np.allclose(
         fk,
@@ -269,6 +271,7 @@ def test_compute_fk_expression(world_setup):
 
 def test_apply_control_commands(world_setup):
     world, l1, l2, bf, r1, r2 = world_setup
+    state_memory_id = id(world.state.data)
     connection: PrismaticConnection = world.get_connection(r1, r2)
     cmd = np.array([100.0, 0, 0, 0, 0, 0, 0, 0])
     dt = 0.1
@@ -277,6 +280,8 @@ def test_apply_control_commands(world_setup):
     assert world.state[connection.dof.id].acceleration == 100.0 * dt
     assert world.state[connection.dof.id].velocity == 100.0 * dt * dt
     assert world.state[connection.dof.id].position == 100.0 * dt * dt * dt
+    # the state should reuse the same memory
+    assert state_memory_id == id(world.state.data)
 
 
 def test_compute_relative_pose(world_setup):