IMPORTANT: IT CLIMBS

ubgk · ubgk · commit 6e1d9e224837 · 2026-01-26T20:47:43.000+01:00
diff --git a/go2_simulation/bullet_wrapper.py b/go2_simulation/bullet_wrapper.py
@@ -37,16 +37,7 @@ def __init__(self, node, timestep):
         self.load_obstacles()
 
     def init_pybullet(self, timestep):
-        cid = pybullet.connect(pybullet.SHARED_MEMORY)
-        if cid < 0:
-            pybullet.connect(pybullet.GUI, options="--opengl3")
-        else:
-            pybullet.connect(pybullet.GUI)
-
-        # Load robot
-        self.robot = pybullet.loadURDF(GO2_DESCRIPTION_URDF_PATH, [0, 0, 0.4])
-        print('URDF loaded from:', GO2_DESCRIPTION_URDF_PATH)
-        self.localInertiaPos = pybullet.getDynamicsInfo(self.robot, -1)[3]
+        pybullet.connect(pybullet.GUI, options="--opengl3")
 
         # Load ground plane and other obstacles
         self.env_ids = []  # Keep track of all obstacles
@@ -56,10 +47,11 @@ def init_pybullet(self, timestep):
         self.env_ids.append(self.plane_id)
         pybullet.resetBasePositionAndOrientation(self.plane_id, [0, 0, 0], [0, 0, 0, 1])
 
-        self.ramp_id = pybullet.loadURDF(
-            os.path.join(get_package_share_directory("go2_simulation"), "data/assets/obstacles.urdf")
-        )
-        self.env_ids.append(self.ramp_id)
+        # Load robot
+        GO2_DESCRIPTION_URDF_PATH = '/home/hamlet/Workspace/reinforcement-learning/inference/assets/go2/go2.urdf'
+        self.robot = pybullet.loadURDF(GO2_DESCRIPTION_URDF_PATH, [0, 0, 0.4])
+        print('URDF loaded from:', GO2_DESCRIPTION_URDF_PATH)
+        self.localInertiaPos = pybullet.getDynamicsInfo(self.robot, -1)[3]
 
         # Set time step
         pybullet.setTimeStep(timestep)
@@ -97,7 +89,7 @@ def init_pybullet(self, timestep):
                 self.feet_idx[foot_id] = (i, link_name)
 
         # Set robot initial config on the ground
-        initial_q = [0.0, 1.00, -2.1, 0.0, 1.00, -2.1, 0, 1.00, -2.1, 0, 1.00, -2.1]
+        initial_q = [-0.1,  0.8, -1.5, 0.1,  0.8, -1.5,  -0.1,  1., -1.5, 0.1,  1., -1.5]
         for i, id in enumerate(self.j_idx):
             pybullet.resetJointState(self.robot, id, initial_q[i])
 
@@ -148,7 +140,7 @@ def load_obstacles(self):
             pybullet.GEOM_BOX, halfExtents=half_extents, rgbaColor=[1, 0, 0, 1]
         )
 
-        num_boxes = 8
+        num_boxes = 2
         for i in range(num_boxes):
             box_id = pybullet.createMultiBody(
                 baseMass=0,
@@ -178,6 +170,30 @@ def get_joint_id(self, joint_name):
                 return i
         return None  # Joint name not found
 
+
+    def get_feet_contact_states(self):
+        f_current = np.zeros(4)
+        for i, foot_name in enumerate(self.foot_link_names):
+            for collision_id in self.env_ids:
+                foot_link_id = self.feet_idx[i][0]
+
+                # Get contact points between foot and ground
+                contact_points = pybullet.getContactPoints(
+                    bodyA=self.robot,
+                    bodyB=collision_id,
+                    linkIndexA=foot_link_id
+                )
+
+                # Check if there are any contacts
+                is_in_contact = len(contact_points) > 0
+
+                if is_in_contact:
+                    f_current[i] = 39.4  # roughly 1/4 of the robot mass (0th order approx)
+                    break  # No need to check other obstacles for this foot
+
+        return f_current
+
+
     def step(self, tau_cmd):
         # Set actuation
         pybullet.setJointMotorControlArray(
@@ -212,27 +228,9 @@ def step(self, tau_cmd):
         q_current = np.concatenate((np.array(linear_pose), np.array(angular_pose), joint_position))
         v_current = np.concatenate((np.array(linear_vel), np.array(angular_vel), joint_velocity))
         a_current = ((v_current - self.v_last) / self.dt) if self.v_last is not None else np.zeros(6 + 12)
-        f_current = np.zeros(4)
-
         self.v_last = v_current
 
-        for i, foot_name in enumerate(self.foot_link_names):
-            for collision_id in self.env_ids:
-                foot_link_id = self.feet_idx[i][0]
-
-                # Get contact points between foot and ground
-                contact_points = pybullet.getContactPoints(
-                    bodyA=self.robot,
-                    bodyB=collision_id,
-                    linkIndexA=foot_link_id
-                )
-
-                # Check if there are any contacts
-                is_in_contact = len(contact_points) > 0
-
-                if is_in_contact:
-                    f_current[i] = 39.4  # roughly 1/4 of the robot mass (0th order approx)
-                    break  # No need to check other obstacles for this foot
+        f_current = self.get_feet_contact_states()
 
         return q_current, v_current, a_current, f_current
 
diff --git a/go2_simulation/simulation_node.py b/go2_simulation/simulation_node.py
@@ -4,6 +4,7 @@
 from unitree_go.msg import LowState, LowCmd
 from nav_msgs.msg import Odometry
 import numpy as np
+import pybullet as pb
 from scipy.spatial.transform import Rotation as R
 
 from tf2_ros import TransformBroadcaster
@@ -14,6 +15,34 @@
 from rclpy.time import Time
 from rclpy.duration import Duration
 
+import onnxruntime as rt
+from collections import deque
+
+def euler_from_quaternion(quat_angle):
+    """
+    NOTE: This was copied from extreme-parkour repo
+
+    Convert a quaternion into euler angles (roll, pitch, yaw)
+    roll is rotation around x in radians (counterclockwise)
+    pitch is rotation around y in radians (counterclockwise)
+    yaw is rotation around z in radians (counterclockwise)
+    """
+    x, y, z, w = quat_angle
+    t0 = +2.0 * (w * x + y * z)
+    t1 = +1.0 - 2.0 * (x * x + y * y)
+    roll_x = np.arctan2(t0, t1)
+
+    t2 = +2.0 * (w * y - z * x)
+    t2 = np.clip(t2, -1, 1)
+    pitch_y = np.arcsin(t2)
+
+    t3 = +2.0 * (w * z + x * y)
+    t4 = +1.0 - 2.0 * (y * y + z * z)
+    yaw_z = np.arctan2(t3, t4)
+
+    return roll_x, pitch_y, yaw_z # in radians
+
+
 class Go2Simulation(Node):
     def __init__(self):
         super().__init__("go2_simulation")
@@ -28,13 +57,14 @@ def __init__(self):
         self.clock_publisher = self.create_publisher(Clock, "/clock", 10)
 
         # Timer to publish periodically
-        self.high_level_period = 1.0 / 500  # seconds
-        self.low_level_sub_step = 4
+        self.high_level_period = 1.0 / 50  # seconds
+        self.low_level_sub_step = 24
         self.timer = self.create_timer(self.high_level_period, self.update)
 
         ########################## Camera
         self.camera_period = 1.0 / 10 # seconds
         self.camera_decimation = int(self.camera_period / self.high_level_period)
+        breakpoint()
 
         ########################## Cmd listener
         self.create_subscription(LowCmd, "/lowcmd", self.receive_cmd_cb, 10)
@@ -70,13 +100,144 @@ def __init__(self):
         self.sim_time = Time(seconds=0, nanoseconds=0)
         self.time_delta = Duration(seconds=0, nanoseconds=int(self.high_level_period * 1e9))
 
+        self.init_onnx()
+
+    def init_onnx(self):
+        onnx_path = "./models/wall.onnx"
+        onnx_path = "/home/hamlet/Workspace/reinforcement-learning/inference/" + onnx_path
+        self.onnx_session = rt.InferenceSession(onnx_path)
+
+        self.w_T_b = np.eye(4)
+        self.joint_pos = np.zeros(12)
+        self.joint_vel = np.zeros(12)
+        self.joint_pos_policy = np.zeros(12)
+        self.joint_vel_policy = np.zeros(12)
+
+        self.q0 = np.array([-0.1,  0.8, -1.5, 0.1,  0.8, -1.5,  -0.1,  1., -1.5, 0.1,  1., -1.5])
+        self.q_des = self.q0.copy()
+
+        # First two elements are 0, third is the forward speed
+        forward_speed = 0.37
+        self.vel_cmd = np.array([0., 0., forward_speed])
+        self.env_class = np.array([1, 0])
+
+        self.action_buffer = deque(maxlen=2)
+        self.depth_buffer = deque(maxlen=2)
+
+        self.depth_latent = np.zeros((1, 32), dtype=np.float32)
+        self.vobs = np.zeros((1, 58, 87), dtype=np.float32)
+        self.yaws = np.zeros((1, 2), dtype=np.float32)
+        self.obs = np.zeros((1, 53), dtype=np.float32)
+        self.obs_history = np.zeros((1, 10, 53), dtype=np.float32)
+        self.rnn_hidden_in = np.zeros((1, 1, 512), dtype=np.float32)
+        self.update_depth = np.zeros((1,1), dtype=np.float32)
+        self.update_yaw = np.ones((1,1), dtype=np.float32)
+        self.step_counter = np.zeros((1,), dtype=np.float32)
+
+        self.actions = np.zeros((1, 12), dtype=np.float32)
+
+
+    def forward(self, camera: bool = False):
+        if self.i == 0:
+            return np.zeros(12)
+
+        robot_id = self.simulator.robot
+
+        if camera:
+            im = self.simulator.get_camera_image().astype(np.float32)
+            self.vobs[:] = (im / 255.) - 0.5
+            self.update_yaw[:] = 1.0
+            self.update_depth[:] = 1.0
+        else:
+            self.update_yaw[:] = 0.0
+            self.update_depth[:] = 0.0
+
+        w_P_b, w_Q_b = pb.getBasePositionAndOrientation(robot_id)
+
+        w_P_b = np.array(w_P_b, dtype=np.float32)
+        w_R_b = np.array(pb.getMatrixFromQuaternion(w_Q_b), dtype=np.float32).reshape(
+            3, 3
+        )
+
+        self.w_T_b[:3, :3] = w_R_b
+        self.w_T_b[:3, 3] = w_P_b
+
+        _, ang_vel_w = pb.getBaseVelocity(robot_id)
+        ang_vel_b = w_R_b.T @ np.array(ang_vel_w)
+        contact_states = self.low_msg.foot_force > 20
+
+        roll, pitch, yaw = euler_from_quaternion(w_Q_b)
+        imu_obs = np.array([roll, pitch])
+
+        q = np.array([ms.q for ms in self.low_msg.motor_state])[:12] - self.q0
+
+        self.joint_vel[:] = (q - self.joint_pos) * 50. 
+        self.joint_pos[:] = q
+
+        obs_data = [
+            1 * ang_vel_b * 0.25, # 3
+            1 * imu_obs, # 2
+            [0.0],
+            1 * self.yaws.squeeze(),
+            1 * self.vel_cmd, # 3
+            1 * self.env_class, # 2
+            1 * self.joint_pos, 
+            1 * (self.joint_vel * 0.05),
+            1 * (self.actions.squeeze()),
+            1 * (contact_states - 0.5)
+        ]
+
+        clip = lambda a: np.clip(a, -100.0, 100.0)
+        self.obs[:] = (
+            np.concatenate(obs_data).reshape(1, 53).astype(np.float32)
+        )
+        self.obs[:] = clip(self.obs)
+        self.step_counter[:] = self.i - 1
+
+        # Policy module
+        inputs = {
+            "depth": clip(self.vobs),
+            "depth_latent_in": self.depth_latent,
+            "yaw_in": clip(self.yaws),
+            "obs_proprio": clip(self.obs),
+            "obs_history_in": clip(self.obs_history),
+            "update_depth": self.update_depth,
+            "update_yaw": self.update_yaw,
+            "hidden_states_in": self.rnn_hidden_in,
+            "step_counter": self.step_counter
+        }
+
+        nn_actions, depth_latent, yaws, obs_history, _ = self.onnx_session.run(
+            ['actions', 'depth_latent_out', 'yaw_out', 'obs_history_out', 'hidden_states_out'], inputs
+        )
+        self.actions[:] = nn_actions.astype(np.float32)
+        self.depth_latent[:] = depth_latent
+        self.yaws[:] = yaws
+        self.obs_history[:] = obs_history
+        # self.rnn_hidden_in[:] = hidden_states_out
+
+        return self.q0 + (np.clip(self.actions.squeeze(), -4.8, 4.8) * .25)
+
+
     def update(self):
         ## Control robot
-        q_des = np.array([self.last_cmd_msg.motor_cmd[i].q for i in range(12)])
-        v_des = np.array([self.last_cmd_msg.motor_cmd[i].dq for i in range(12)])
-        tau_des = np.array([self.last_cmd_msg.motor_cmd[i].tau for i in range(12)])
-        kp_des = np.array([self.last_cmd_msg.motor_cmd[i].kp for i in range(12)])
-        kd_des = np.array([self.last_cmd_msg.motor_cmd[i].kd for i in range(12)])
+        if False:
+            q_des = np.array([self.last_cmd_msg.motor_cmd[i].q for i in range(12)])
+            v_des = np.array([self.last_cmd_msg.motor_cmd[i].dq for i in range(12)])
+            tau_des = np.array([self.last_cmd_msg.motor_cmd[i].tau for i in range(12)])
+            kp_des = np.array([self.last_cmd_msg.motor_cmd[i].kp for i in range(12)])
+            kd_des = np.array([self.last_cmd_msg.motor_cmd[i].kd for i in range(12)])
+        else:
+            # Camera update
+            if self.i % self.camera_decimation == 0:
+                q_des = self.forward(camera=True)
+            else:
+                q_des = self.forward(camera=False)
+
+            v_des = np.zeros(12)
+            tau_des = np.zeros(12)
+            kp_des = 40 * np.ones(12)
+            kd_des = 1 * np.ones(12)
 
         for _ in range(self.low_level_sub_step):
             # Iterate to simulate motor internal controller
@@ -112,6 +273,7 @@ def update(self):
         low_msg.foot_force = (14.2 * np.ones(4) + 0.562 * self.f_current).astype(np.int32).tolist()
 
         # Format IMU
+        # bullet quat
         quat_xyzw = self.q_current[3:7].tolist()
         l_angular_vel = self.v_current[3:6]  # In local frame
         l_linear_acc = self.a_current[0:3]  # In local frame
@@ -133,6 +295,7 @@ def update(self):
 
         # Publish message
         self.lowstate_publisher.publish(low_msg)
+        self.low_msg = low_msg
 
         ## Send robot pose
         # Odometry / state estimation
@@ -167,10 +330,6 @@ def update(self):
         transform_msg.transform.rotation.w = self.q_current[6]
         self.tf_broadcaster.sendTransform(transform_msg)
 
-        # Camera update
-        if self.i % self.camera_decimation == 0:
-            self.camera_update()
-
         # Check that the simulator is on time
         if self.timer.time_until_next_call() < 0 and self.i % self.camera_decimation != 0:
             ratio = 1.0 - self.timer.time_until_next_call() * 1e-9 / self.high_level_period