-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Closed
Description
Issue Description
Problem Summary
I am currently using tiled camera in isaac sim to train my agent. I wanna to project the ball into the image plane using relative pos and camera intrinsics. However, I found that the ball could not project to the right pixel when it gets closer and far from the central(cx,cy). I have calibrated the camera in isaaclab and got the intrinsic, however there is still some difference between the rendering position and the projected position (can be seen in the video).
Environment Details
- Isaac Lab version: [2.0.0] 3d6f55b
- Task: Custom image data collection environment
- Device: CUDA
Similar Case
Not found yet
Additional Information
This is a video to demonstrate my situation
test_projection.mp4
Here is some part of my collection code
# 重新构建DVX Explorer相机配置
DVXPLORER_CONFIG = CameraConfig(
tiled_camera_cfg=TiledCameraCfg(
prim_path="/World/envs/env_.*/Catcher/base_link/dvx_link/event",
update_period=0.0,
height=320,
width=480,
data_types=["rgb"],
spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
intrinsic_matrix=[
294.751068115234,
0,
335.108341938485,
0,
294.796234130859,
222.471913085337,
0,
0,
1,
],
height=480,
width=640,
clipping_range=(0.02, 30.0),
),
offset=TiledCameraCfg.OffsetCfg(
pos=(0.01, 0.0, 0.0), rot=(0.9659258, 0.258819, 0, 0), convention="ros"
),
),
calib_intrinsic_matrix=[
640.2224987206213,
0.0,
639.7167381848936,
0.0,
641.4571712153895,
479.3520435710122,
0.0,
0.0,
1.0,
],
calib_distortion_coefficients=[
-0.00856454974761798,
0.027517084419312977,
-0.0004431454940396667,
0.0006407202485259813,
-0.024171418179324868,
],
cam_link_prim_path="/World/envs/env_0/Catcher/base_link/dvx_link/event",
sensor_type="gray",
save_type="image",
)
--------------------------------------------------------
def project_points_to_camera(
self,
ball_pos: torch.Tensor, # [B, 3] 世界坐标系中的点
robot_pos: torch.Tensor, # [B, 3] 相机父坐标系位置
robot_quat: torch.Tensor, # [B, 4] 相机父坐标系四元数
ball_radius: torch.Tensor,
return_normalized: bool = True, # 是否返回归一化坐标
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
将世界坐标系中的点投影到相机平面
Args:
world_points: [B, 3] 世界坐标系中的点位置
camera_parent_pos: [B, 3] 相机父坐标系的世界位置
camera_parent_quat: [B, 4] 相机父坐标系的世界四元数
return_normalized: 是否返回归一化坐标 [0,1]
Returns:
projected_points: [B, 2] 投影后的点位置
visibility_mask: [B] 可见性掩码 (True=可见, False=不可见)
"""
batch_size = ball_pos.shape[0]
# 1. 计算相机在世界坐标系中的位置和姿态
camera_pos_w, camera_quat_w = combine_frame_transforms(
robot_pos, # [B, 3]
robot_quat, # [B, 4]
self.t_body_cam.expand(batch_size, -1), # [B, 3]
self.q_body_cam.expand(batch_size, -1), # [B, 4]
)
# 2. 将世界坐标转换到相机坐标系
points_in_camera, _ = subtract_frame_transforms(
camera_pos_w, camera_quat_w, ball_pos
)
projected_points = project_points(
points_in_camera, self.intrinsic_matrix
).squeeze(0) # [B, 2]
# projected_points = torch.matmul(self.intrinsic_matrix, points_in_camera.unsqueeze(1)) # [B ,3 ,3] * [B ,1 ,3]
distance = torch.norm(points_in_camera, dim=1)
radius_uv = (self.focal_length * ball_radius / distance).unsqueeze(
1
) # [B, 1] 计算球的半径在图像平面上的投影半径
# 对于不可见的点,设置为 [-1, -1]
# projected_points[~visibility_mask] = -1.0
in_front_mask = points_in_camera[:, 2] > 0.01 # 至少1cm在前方
in_bounds_mask = torch.logical_and(
projected_points[:, 0] >= self.roi_w[0],
projected_points[:, 0] < self.roi_w[1],
) & torch.logical_and(
projected_points[:, 1] >= self.roi_h[0],
projected_points[:, 1] < self.roi_h[1],
)
visibility_mask = in_front_mask & in_bounds_mask # [B]
# 3. 组合可见性掩码
projected_points[~visibility_mask] = torch.tensor(
[self.image_height, self.image_width, -1.0], device=self.device
) #
return projected_points, radius_uv # , visibility_mask
---------------------------------------
while simulation_app.is_running():
# progress manager
if sim_time > target_time_list[progress]:
break
# if sim_time > target_time_list[progress] / 3.0:
# _test_action_hover[:, 2] = 0.5 # pitch
# _test_action_hover[:, 1] = 0.4 # 给
# controller
_actions = _test_action_hover
quad_act_ = _actions * _act_std + _act_mean
# pprint( {"quad_act_": quad_act_})
# robot_state = torch.cat([robot.data.root_quat_w , robot.data.root_ang_vel_b ], dim = -1)
robot_state = robot.data.root_ang_vel_b
# mine limit
body_torques_thrust_real = controller.compute_withlimit(
robot_state,
target_thrust=quad_act_[:, 0] * _robot_weight, # need rescale
target_rate=quad_act_[:, 1:],
)
_collective_thrust[..., 2] = body_torques_thrust_real[..., 3].unsqueeze(1)
_body_torques[..., :] = body_torques_thrust_real[..., :3].unsqueeze(1)
# pprint( {"_collective_thrust": _collective_thrust ,"_body_torques": _body_torques })
_force_hover[..., 2] = _robot_weight
robot.set_external_force_and_torque(
_collective_thrust, _body_torques, body_ids=_body_id
) # air drag on body
# robot.set_external_force_and_torque(
# _force_hover, _zero_torques, body_ids=_body_id
# ) # air drag on body
# robot.write_root_pose_to_sim(robot.data.default_root_state[:, :7])
# robot.write_root_velocity_to_sim(robot.data.default_root_state[:, 7:])
robot.write_data_to_sim()
# ball state ( air drag )
# 计算球体阻力 (不带随机化)
ball_aero.params.enable_randomization = True
_ball_air_drag[..., :] = ball_aero.compute_drag(ball_read_velocity).unsqueeze(
1
) # [2,3]
# pprint({"drag_ball": _ball_air_drag})
ball.set_external_force_and_torque(
_ball_air_drag, _zero_torques
) # air drag on body
# robot.set_external_force_and_torque(_zero_thrust, _zero_torques, body_ids=_robot_body_id) # air drag on body
ball.write_data_to_sim()
# perform step
sim.step()
# following could be done automatically by scene.update
tiled_camera.update(sim_dt)
# rs_cam.update(sim_dt)
ball.update(sim_dt)
robot.update(sim_dt)
contact_forces_net_bottom.update(sim_dt, force_recompute=True)
# setup_advanced_camera_properties(camera_prim_path , exposure_time=1.0)
# verify_camera_properties(camera_prim_path)
ball_read_velocity = ball.data.root_lin_vel_b
ball_pixel_uvz, ball_pixel_radius = _camera_tool.project_points_to_camera(
ball_pos=ball.data.root_pos_w,
robot_pos=robot.data.root_pos_w,
robot_quat=robot.data.root_quat_w,
ball_radius=cfg.ball_cfg.spawn.radius,
)
if cfg.my_cam_cfg.sensor_type == "rgb":
# rgb
rgb_image = tiled_camera.data.output["rgb"]
rgb_np = rgb_image[0].cpu().numpy() # [H, W, 3]
bgr_np = cv2.cvtColor(rgb_np, cv2.COLOR_RGB2BGR)
output_dir = "./output/rgb"
elif cfg.my_cam_cfg.sensor_type == "depth":
# depth
depth_image = (
tiled_camera.data.output["distance_to_image_plane"].clamp(0.1, 10.0)
/ 10.0
) # clamp to avoid inf and nan
depth_image_np = depth_image[0].cpu().numpy() * 255.0 # [H, W, 1]
bgr_np = cv2.cvtColor(depth_image_np, cv2.COLOR_GRAY2BGR)
output_dir = "./output/depth"
elif cfg.my_cam_cfg.sensor_type == "gray":
# gray
output_dir = "./output/gray"
rgb_image = tiled_camera.data.output["rgb"]
grayscale = (
0.299 * rgb_image[..., 0]
+ 0.587 * rgb_image[..., 1]
+ 0.114 * rgb_image[..., 2]
)
# grayscale = T.Grayscale(rgb_image)
if cfg.my_cam_cfg.save_type == "image":
# gray_frames_tensor[img_count] = grayscale[0] # [H, W]
gray_np = grayscale[0].cpu().numpy() # [H, W, 3]
gray_np_distorted = cv2.undistort(
src=gray_np,
cameraMatrix=np.array(
cfg.my_cam_cfg.calib_intrinsic_matrix
).reshape(3, 3),
distCoeffs=np.array(cfg.my_cam_cfg.calib_distortion_coefficients),
)
# diff_img = cv2.absdiff(gray_np, gray_np_distorted)
# cv2.imwrite(os.path.join(output_dir, f"{img_count:06d}_diff.png"), diff_img)
bgr_np = cv2.cvtColor(gray_np_distorted, cv2.COLOR_GRAY2BGR)
# bgr_np = 0.2989 * rgb_image[:, 0:1] + 0.5870 * rgb_image[:, 1:2] + 0.1140 * rgb_image[:, 2:3]
elif cfg.my_cam_cfg.save_type == "video":
gray_np = grayscale[0].cpu().numpy().astype(np.uint8) # [H, W, 3]
bgr_np = cv2.cvtColor(gray_np, cv2.COLOR_GRAY2RGB)
video_frame.append(bgr_np) # [1, H, W]
video_writer.write(bgr_np)
if cfg.my_cam_cfg.save_type == "image":
os.makedirs(output_dir, exist_ok=True) # 创建文件夹
filename = os.path.join(output_dir, f"{img_count:06d}.png")
# cv2.imwrite(filename, bgr_np)
u, v, z = (
int(ball_pixel_uvz[..., 0]),
int(ball_pixel_uvz[..., 1]),
(ball_pixel_uvz[..., 2]),
)
poi_u, poi_v = (
int(point_of_interest[0].item()),
int(point_of_interest[1].item()),
)
if (
u > 0
and v > 0
and u < tiled_camera_cfg.width
and v < tiled_camera_cfg.height
and z > 0
and z < 30.0
):
print("Ball pixel position:", u, v, z)
# 获取像素半径 (如果提供,否则使用默认值)
if ball_pixel_radius.item() > 0:
radius = int(ball_pixel_radius.item())
else:
# 如果没有提供半径,使用默认值
radius = 2 # 默认半径
# 绘制红色圆圈
cv2.circle(
bgr_np,
(u, v),
radius,
(0, 0, 255), # 红色 (BGR)
1, # 线宽
)
cv2.circle(
bgr_np,
(poi_u, poi_v), # drawing point of interest
radius,
(255, 0, 0), # 蓝色 (BGR)
1, # 线宽
)
filename = os.path.join(output_dir, f"{img_count:06d}.png")
cv2.imwrite(filename, bgr_np)Could you please investigate this issue? Thank you for your time and consideration!
Metadata
Metadata
Assignees
Labels
No labels