Add input_ref_frame = base option for whole body ik and devices (#754)

kevin-thankyou-lin · web-flow · commit d1cab0bf5979 · 2025-10-23T00:34:49.000-05:00
* Add input_ref_frame = base option for whole body ik and devices

* Correct goal_update_name: desired -&gt; target

* Add more finegrained input_ref_frame for ik controller

* Update function name to controller_base_pose

* Check robot has torso

* Allow torso action as part of base actions

* Enable torso action for joint position delta controller

* Add assert for WHOLE_BODY_IK has 1 robot only

* Format

* Update default goal update mode
diff --git a/robosuite/controllers/composite/composite_controller.py b/robosuite/controllers/composite/composite_controller.py
@@ -474,5 +474,6 @@ def _init_joint_action_policy(self):
             max_dq_torso=self.composite_controller_specific_config.get("ik_max_dq_torso", 0.2),
             input_rotation_repr=self.composite_controller_specific_config.get("ik_input_rotation_repr", "axis_angle"),
             input_type=self.composite_controller_specific_config.get("ik_input_type", "axis_angle"),
+            input_ref_frame=self.composite_controller_specific_config.get("ik_input_ref_frame", "world"),
             debug=self.composite_controller_specific_config.get("verbose", False),
         )
diff --git a/robosuite/controllers/config/robots/default_pandaomron_whole_body_ik.json b/robosuite/controllers/config/robots/default_pandaomron_whole_body_ik.json
@@ -0,0 +1,116 @@
+{
+    "type": "WHOLE_BODY_IK",
+    "composite_controller_specific_configs": {
+        "ref_name": ["gripper0_right_grip_site", "gripper0_left_grip_site"],
+        "interpolation": null,
+        "actuation_part_names": ["right", "left"],
+        "max_dq": 4,
+        "nullspace_joint_weights": {
+            "robot0_torso_waist_yaw": 100.0,
+            "robot0_torso_waist_pitch": 100.0,
+            "robot0_torso_waist_roll": 500.0,
+            "robot0_l_shoulder_pitch": 4.0,
+            "robot0_r_shoulder_pitch": 4.0,
+            "robot0_l_shoulder_roll": 3.0,
+            "robot0_r_shoulder_roll": 3.0,
+            "robot0_l_shoulder_yaw": 2.0,
+            "robot0_r_shoulder_yaw": 2.0
+        },
+        "ik_pseudo_inverse_damping": 5e-2,
+        "ik_integration_dt": 1e-1,
+        "ik_max_dq": 4.0,
+        "ik_max_dq_torso": 0.2,
+        "ik_input_type": "absolute",
+        "ik_input_ref_frame": "mobilebase0_base",
+        "ik_input_rotation_repr": "axis_angle",
+        "verbose": false
+    },
+    "body_parts": {
+        "arms": {
+            "right": {
+                "type" : "JOINT_POSITION",
+                "input_max": 1,
+                "input_min": -1,
+                "input_type": "absolute",
+                "output_max": 0.5,
+                "output_min": -0.5,
+                "kd": 200,
+                "kv": 200,
+                "kp": 1000,
+                "velocity_limits": [-1,1],
+                "kp_limits": [0, 1000],
+                "interpolation": null,
+                "ramp_ratio": 0.2,
+                "gripper": {
+                    "type": "GRIP"
+                }
+            },
+            "left": {
+                "type" : "JOINT_POSITION",
+                "input_max": 1,
+                "input_min": -1,
+                "input_type": "absolute",
+                "output_max": 0.5,
+                "output_min": -0.5,
+                "kd": 200,
+                "kv": 200,
+                "kp": 1000,
+                "velocity_limits": [-1,1],
+                "kp_limits": [0, 1000],
+                "interpolation": null,
+                "ramp_ratio": 0.2,
+                "gripper": {
+                    "type": "GRIP"
+                }
+            }
+        },
+        "torso": {
+            "type" : "JOINT_POSITION",
+            "input_max": 1,
+            "input_min": -1,
+            "input_type": "absolute",
+            "output_max": 0.5,
+            "output_min": -0.5,
+            "kd": 200,
+            "kv": 200,
+            "kp": 1000,
+            "velocity_limits": [-1,1],
+            "kp_limits": [0, 1000],
+            "interpolation": null,
+            "ramp_ratio": 0.2
+        },
+        "head": {
+            "type" : "JOINT_POSITION",
+            "input_max": 1,
+            "input_min": -1,
+            "input_type": "absolute",
+            "output_max": 0.5,
+            "output_min": -0.5,
+            "kd": 200,
+            "kv": 200,
+            "kp": 1000,
+            "velocity_limits": [-1,1],
+            "kp_limits": [0, 1000],
+            "interpolation": null,
+            "ramp_ratio": 0.2
+        },
+        "base": {
+            "type" : "JOINT_VELOCITY",
+            "interpolation": null
+        },
+        "legs": {
+            "type": "JOINT_POSITION",
+            "input_max": 1,
+            "input_min": -1,
+            "output_max": 0.5,
+            "output_min": -0.5,
+            "kd": 200,
+            "kv": 200,
+            "kp": 1000,
+            "velocity_limits": [-1,1],
+            "kp_limits": [0, 1000],
+            "interpolation": null,
+            "ramp_ratio": 0.2
+        }
+    }
+}
diff --git a/robosuite/devices/device.py b/robosuite/devices/device.py
@@ -34,6 +34,7 @@ def _reset_internal_state(self):
         self.base_modes = [False] * len(self.all_robot_arms)
 
         self._prev_target = {arm: None for arm in self.all_robot_arms[self.active_robot]}
+        self._prev_torso_target = None
 
     @property
     def active_arm(self):
@@ -71,7 +72,7 @@ def get_controller_state(self) -> Dict:
     def _postprocess_device_outputs(self, dpos, drotation):
         raise NotImplementedError
 
-    def input2action(self, mirror_actions=False) -> Optional[Dict]:
+    def input2action(self, mirror_actions=False, goal_update_mode="target") -> Optional[Dict]:
         """
         Converts an input from an active device into a valid action sequence that can be fed into an env.step() call
 
@@ -80,6 +81,8 @@ def input2action(self, mirror_actions=False) -> Optional[Dict]:
         Args:
             mirror_actions (bool): actions corresponding to viewing robot from behind.
                 first axis: left/right. second axis: back/forward. third axis: down/up.
+            goal_update_mode (str): the mode to update the goal in. Can be 'target' or 'achieved'.
+            If 'target', the goal is updated based on the current target goal. If 'achieved', the goal is updated based on the current achieved state.
 
         Returns:
             Optional[Dict]: Dictionary of actions to be fed into env.step()
@@ -134,6 +137,7 @@ def input2action(self, mirror_actions=False) -> Optional[Dict]:
                 robot,
                 arm,
                 norm_delta=np.zeros(6),
+                goal_update_mode=goal_update_mode,
             )
             ac_dict[f"{arm}_abs"] = arm_action["abs"]
             ac_dict[f"{arm}_delta"] = arm_action["delta"]
@@ -144,17 +148,20 @@ def input2action(self, mirror_actions=False) -> Optional[Dict]:
             if base_mode is True:
                 arm_norm_delta = np.zeros(6)
                 base_ac = np.array([dpos[0], dpos[1], drotation[2]])
-                torso_ac = np.array([dpos[2]])
+                device_torso_input = dpos[2]  # Use vertical movement for torso in base mode
             else:
                 arm_norm_delta = np.concatenate([dpos, drotation])
                 base_ac = np.zeros(3)
-                torso_ac = np.zeros(1)
+                device_torso_input = 0.0  # No torso input when not in base mode
 
             ac_dict["base"] = base_ac
-            # ac_dict["torso"] = torso_ac
             ac_dict["base_mode"] = np.array([1 if base_mode is True else -1])
         else:
             arm_norm_delta = np.concatenate([dpos, drotation])
+            device_torso_input = 0.0  # No torso input for non-mobile robots by default
+
+        if hasattr(robot, "torso") and robot.torso is not None:
+            ac_dict["torso"] = self.get_torso_action(robot, device_torso_input)
 
         # populate action dict items for arm and grippers
         arm_action = self.get_arm_action(
@@ -183,7 +190,7 @@ def get_arm_action(self, robot, arm, norm_delta, goal_update_mode="target"):
         assert goal_update_mode in [
             "achieved",
             "target",
-        ]  # update next target either based on achieved pose or current target pose
+        ], f"goal_update_mode must be either 'achieved' or 'target', got {goal_update_mode}" # update next target either based on achieved pose or current target pose
 
         # TODO: the logic between OSC and while body based ik is fragmented right now. Unify
         if isinstance(robot.part_controllers[arm], OperationalSpaceController):
@@ -249,6 +256,10 @@ def get_arm_action(self, robot, arm, norm_delta, goal_update_mode="target"):
                 pos = self._prev_target[arm][0:3].copy()
                 ori = T.quat2mat(T.axisangle2quat(self._prev_target[arm][3:6].copy()))
 
+            ref_frame = self.env.robots[0].composite_controller.composite_controller_specific_config.get(
+                "ik_input_ref_frame", "world"
+            )
+
             delta_action = norm_delta.copy()
             delta_action[0:3] *= 0.05
             delta_action[3:6] *= 0.15
@@ -262,9 +273,63 @@ def get_arm_action(self, robot, arm, norm_delta, goal_update_mode="target"):
             abs_action = np.concatenate([new_pos, new_axisangle])
             self._prev_target[arm] = abs_action.copy()
 
+            # convert to be w.r.t base frame
+            if ref_frame != "world":
+                # convert to matrix format
+                abs_action_mat = T.make_pose(
+                    translation=abs_action[0:3],
+                    rotation=T.quat2mat(T.axisangle2quat(abs_action[3:6])),
+                )
+                delta_action_mat = T.make_pose(
+                    translation=delta_action[0:3],
+                    rotation=T.quat2mat(T.axisangle2quat(delta_action[3:6])),
+                )
+                abs_action_base_mat = self.env.robots[0].composite_controller.joint_action_policy.transform_pose(
+                    src_frame_pose=abs_action_mat,
+                    src_frame="world",
+                    dst_frame=ref_frame,
+                )
+                delta_action_base_mat = self.env.robots[0].composite_controller.joint_action_policy.transform_pose(
+                    src_frame_pose=delta_action_mat,
+                    src_frame="world",
+                    dst_frame=ref_frame,
+                )
+                # get the new delta action and abs action position and orientation from the matrix
+                delta_action = np.concatenate(
+                    [delta_action_base_mat[:3, 3], T.quat2axisangle(T.mat2quat(delta_action_base_mat[:3, :3]))]
+                )
+                abs_action = np.concatenate(
+                    [abs_action_base_mat[:3, 3], T.quat2axisangle(T.mat2quat(abs_action_base_mat[:3, :3]))]
+                )
+
             return {
                 "delta": delta_action,
                 "abs": abs_action,
             }
         else:
             raise NotImplementedError
+
+    def get_torso_action(self, robot, device_input):
+        """Generate torso action from device input"""
+        if robot.torso is None:
+            return np.zeros(1)
+
+        torso_controller = robot.part_controllers[robot.torso]
+
+        if torso_controller.name == "JOINT_POSITION":
+            if torso_controller.input_type == "delta":
+                scale = 0.2
+                return np.array([device_input * scale])
+            else:
+                scale = 0.01
+                target = self._prev_torso_target if self._prev_torso_target is not None else torso_controller.goal_qpos
+                if abs(device_input) < 1e-6:
+                    action = target
+                else:
+                    action = target + np.array([device_input * scale])
+                self._prev_torso_target = action.copy()
+                return action
+        elif torso_controller.name == "JOINT_VELOCITY":
+            return np.array([device_input * 0.5])
+        else:
+            return np.zeros(1)
diff --git a/robosuite/devices/mjgui.py b/robosuite/devices/mjgui.py
@@ -116,13 +116,16 @@ def _reset_internal_state(self):
             target_mat = self.env.sim.data.site_xmat[self.env.sim.model.site_name2id(site_name)]
             set_mocap_pose(self.env.sim, target_pos, target_mat, f"{target_name_prefix}_eef_target")
 
-    def input2action(self) -> Dict[str, np.ndarray]:
+    def input2action(self, goal_update_mode="desired") -> Dict[str, np.ndarray]:
         """
         Uses mocap body poses to determine action for robot. Obtain input_type
         (i.e. absolute actions or delta actions) and input_ref_frame (i.e. world frame, base frame or eef frame)
         from the controller itself.
 
         """
+        assert (
+            goal_update_mode == "desired"
+        ), "goal_update_mode must be 'desired' for MJGUI: targets are based off the pose of the mocap body."
         # TODO: unify this logic to be independent from controller type.
         action: Dict[str, np.ndarray] = {}
         gripper_dof = self.env.robots[0].gripper[self.active_end_effector].dof
@@ -131,20 +134,6 @@ def input2action(self) -> Dict[str, np.ndarray]:
             target_name_prefix = "right" if "right" in site_name else "left"  # hardcoded for now
             target_pos_world, target_ori_mat_world = get_mocap_pose(self.env.sim, f"{target_name_prefix}_eef_target")
 
-            if isinstance(self.env.robots[0].composite_controller, WholeBodyIK):
-                assert (
-                    self.env.robots[0].composite_controller.composite_controller_specific_config.get(
-                        "ik_input_ref_frame", "world"
-                    )
-                    == "world"
-                ), ("Only support world frame for MJGui teleop for now. " "Please modify the controller configs.")
-                assert (
-                    self.env.robots[0].composite_controller.composite_controller_specific_config.get(
-                        "ik_input_type", "absolute"
-                    )
-                    == "absolute"
-                ), ("Only support absolute actions for MJGui teleop for now. " "Please modify the controller configs.")
-            # check if need to update frames
             if isinstance(self.env.robots[0].composite_controller, WholeBody):
                 # TODO: should be more general
                 if (
diff --git a/robosuite/scripts/collect_human_demonstrations.py b/robosuite/scripts/collect_human_demonstrations.py
@@ -20,7 +20,7 @@
 from robosuite.wrappers import DataCollectionWrapper, VisualizationWrapper
 
 
-def collect_human_trajectory(env, device, arm, max_fr):
+def collect_human_trajectory(env, device, arm, max_fr, goal_update_mode):
     """
     Use the device (keyboard or SpaceNav 3D mouse) to collect a demonstration.
     The rollout trajectory is saved to files in npz format.
@@ -60,7 +60,7 @@ def collect_human_trajectory(env, device, arm, max_fr):
         active_robot = env.robots[device.active_robot]
 
         # Get the newest action
-        input_ac_dict = device.input2action()
+        input_ac_dict = device.input2action(goal_update_mode=goal_update_mode)
 
         # If action is none, then this a reset so we should break
         if input_ac_dict is None:
@@ -279,6 +279,15 @@ def gather_demonstrations_as_hdf5(directory, out_dir, env_info):
         default=False,
         help="(DualSense Only)Reverse the effect of the x and y axes of the joystick.It is used to handle the case that the left/right and front/back sides of the view are opposite to the LX and LY of the joystick(Push LX up but the robot move left in your view)",
     )
+    parser.add_argument(
+        "--goal_update_mode",
+        type=str,
+        default="target",
+        choices=["target", "achieved"],
+        help="Used by the device to get the arm's actions. The mode to update the goal in. Can be 'target' or 'achieved'. If 'target', the goal is updated based on the current target pose. "
+        "If 'achieved', the goal is updated based on the current achieved state. "
+        "We recommend using 'achieved' (and input_ref_frame='base') if collecting demonstrations with a mobile base robot.",
+    )
     args = parser.parse_args()
 
     # Get controller config
@@ -291,6 +300,10 @@ def gather_demonstrations_as_hdf5(directory, out_dir, env_info):
         # mink-speicific import. requires installing mink
         from robosuite.examples.third_party_controller.mink_controller import WholeBodyMinkIK
 
+    # if WHOLE BODY IK; assert only one robot
+    if controller_config["type"] == "WHOLE_BODY_IK":
+        assert len(args.robots) == 1, "Whole Body IK only supports one robot"
+
     # Create argument configuration
     config = {
         "env_name": args.environment,
@@ -366,5 +379,5 @@ def gather_demonstrations_as_hdf5(directory, out_dir, env_info):
 
     # collect demonstrations
     while True:
-        collect_human_trajectory(env, device, args.arm, args.max_fr)
+        collect_human_trajectory(env, device, args.arm, args.max_fr, args.goal_update_mode)
         gather_demonstrations_as_hdf5(tmp_directory, new_dir, env_info)
diff --git a/robosuite/utils/ik_utils.py b/robosuite/utils/ik_utils.py

Original file line number	Diff line number	Diff line change
`@@ -474,5 +474,6 @@ def _init_joint_action_policy(self):`
`474`	`474`	`max_dq_torso=self.composite_controller_specific_config.get("ik_max_dq_torso", 0.2),`
`475`	`475`	`input_rotation_repr=self.composite_controller_specific_config.get("ik_input_rotation_repr", "axis_angle"),`
`476`	`476`	`input_type=self.composite_controller_specific_config.get("ik_input_type", "axis_angle"),`
	`477`	`+ input_ref_frame=self.composite_controller_specific_config.get("ik_input_ref_frame", "world"),`
`477`	`478`	`debug=self.composite_controller_specific_config.get("verbose", False),`
`478`	`479`	`)`