Skip to content

[Question] Question about tiled camera intrinsics #2806

@jingyang-huang

Description

@jingyang-huang

Issue Description

Problem Summary

I am currently using tiled camera in isaac sim to train my agent. I wanna to project the ball into the image plane using relative pos and camera intrinsics. However, I found that the ball could not project to the right pixel when it gets closer and far from the central(cx,cy). I have calibrated the camera in isaaclab and got the intrinsic, however there is still some difference between the rendering position and the projected position (can be seen in the video).

Environment Details

  • Isaac Lab version: [2.0.0] 3d6f55b
  • Task: Custom image data collection environment
  • Device: CUDA

Similar Case

Not found yet

Additional Information

This is a video to demonstrate my situation

test_projection.mp4

Here is some part of my collection code

# 重新构建DVX Explorer相机配置
DVXPLORER_CONFIG = CameraConfig(
    tiled_camera_cfg=TiledCameraCfg(
        prim_path="/World/envs/env_.*/Catcher/base_link/dvx_link/event",
        update_period=0.0,
        height=320,
        width=480,
        data_types=["rgb"],
        spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
            intrinsic_matrix=[
                294.751068115234,
                0,
                335.108341938485,
                0,
                294.796234130859,
                222.471913085337,
                0,
                0,
                1,
            ],
            height=480,
            width=640,
            clipping_range=(0.02, 30.0),
        ),
        offset=TiledCameraCfg.OffsetCfg(
            pos=(0.01, 0.0, 0.0), rot=(0.9659258, 0.258819, 0, 0), convention="ros"
        ),
    ),
    calib_intrinsic_matrix=[
        640.2224987206213,
        0.0,
        639.7167381848936,
        0.0,
        641.4571712153895,
        479.3520435710122,
        0.0,
        0.0,
        1.0,
    ],
    calib_distortion_coefficients=[
        -0.00856454974761798,
        0.027517084419312977,
        -0.0004431454940396667,
        0.0006407202485259813,
        -0.024171418179324868,
    ],
    cam_link_prim_path="/World/envs/env_0/Catcher/base_link/dvx_link/event",
    sensor_type="gray",
    save_type="image",
)
--------------------------------------------------------
def project_points_to_camera(
        self,
        ball_pos: torch.Tensor,  # [B, 3] 世界坐标系中的点
        robot_pos: torch.Tensor,  # [B, 3] 相机父坐标系位置
        robot_quat: torch.Tensor,  # [B, 4] 相机父坐标系四元数
        ball_radius: torch.Tensor,
        return_normalized: bool = True,  # 是否返回归一化坐标
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        将世界坐标系中的点投影到相机平面

        Args:
            world_points: [B, 3] 世界坐标系中的点位置
            camera_parent_pos: [B, 3] 相机父坐标系的世界位置
            camera_parent_quat: [B, 4] 相机父坐标系的世界四元数
            return_normalized: 是否返回归一化坐标 [0,1]

        Returns:
            projected_points: [B, 2] 投影后的点位置
            visibility_mask: [B] 可见性掩码 (True=可见, False=不可见)
        """
        batch_size = ball_pos.shape[0]

        # 1. 计算相机在世界坐标系中的位置和姿态
        camera_pos_w, camera_quat_w = combine_frame_transforms(
            robot_pos,  # [B, 3]
            robot_quat,  # [B, 4]
            self.t_body_cam.expand(batch_size, -1),  # [B, 3]
            self.q_body_cam.expand(batch_size, -1),  # [B, 4]
        )

        # 2. 将世界坐标转换到相机坐标系
        points_in_camera, _ = subtract_frame_transforms(
            camera_pos_w, camera_quat_w, ball_pos
        )
        projected_points = project_points(
            points_in_camera, self.intrinsic_matrix
        ).squeeze(0)  # [B, 2]
        # projected_points =  torch.matmul(self.intrinsic_matrix, points_in_camera.unsqueeze(1))  # [B ,3 ,3] *  [B ,1 ,3]
        distance = torch.norm(points_in_camera, dim=1)

        radius_uv = (self.focal_length * ball_radius / distance).unsqueeze(
            1
        )  # [B, 1] 计算球的半径在图像平面上的投影半径
        # 对于不可见的点,设置为 [-1, -1]
        # projected_points[~visibility_mask] = -1.0
        in_front_mask = points_in_camera[:, 2] > 0.01  # 至少1cm在前方
        in_bounds_mask = torch.logical_and(
            projected_points[:, 0] >= self.roi_w[0],
            projected_points[:, 0] < self.roi_w[1],
        ) & torch.logical_and(
            projected_points[:, 1] >= self.roi_h[0],
            projected_points[:, 1] < self.roi_h[1],
        )
        visibility_mask = in_front_mask & in_bounds_mask  # [B]
        # 3. 组合可见性掩码
        projected_points[~visibility_mask] = torch.tensor(
            [self.image_height, self.image_width, -1.0], device=self.device
        )  #

        return projected_points, radius_uv  # , visibility_mask
---------------------------------------
while simulation_app.is_running():
        # progress manager
        if sim_time > target_time_list[progress]:
            break

        # if sim_time > target_time_list[progress] / 3.0:
        #     _test_action_hover[:, 2] = 0.5  #  pitch
        #     _test_action_hover[:, 1] = 0.4  # 给

        # controller
        _actions = _test_action_hover
        quad_act_ = _actions * _act_std + _act_mean
        # pprint( {"quad_act_": quad_act_})
        # robot_state = torch.cat([robot.data.root_quat_w , robot.data.root_ang_vel_b ], dim = -1)
        robot_state = robot.data.root_ang_vel_b
        # mine limit
        body_torques_thrust_real = controller.compute_withlimit(
            robot_state,
            target_thrust=quad_act_[:, 0] * _robot_weight,  # need rescale
            target_rate=quad_act_[:, 1:],
        )

        _collective_thrust[..., 2] = body_torques_thrust_real[..., 3].unsqueeze(1)
        _body_torques[..., :] = body_torques_thrust_real[..., :3].unsqueeze(1)
        # pprint( {"_collective_thrust": _collective_thrust ,"_body_torques": _body_torques })
        _force_hover[..., 2] = _robot_weight
        robot.set_external_force_and_torque(
            _collective_thrust, _body_torques, body_ids=_body_id
        )  # air drag on body
        # robot.set_external_force_and_torque(
        #     _force_hover, _zero_torques, body_ids=_body_id
        # )  # air drag on body

        # robot.write_root_pose_to_sim(robot.data.default_root_state[:, :7])
        # robot.write_root_velocity_to_sim(robot.data.default_root_state[:, 7:])
        robot.write_data_to_sim()
        # ball state ( air drag )
        # 计算球体阻力 (不带随机化)
        ball_aero.params.enable_randomization = True
        _ball_air_drag[..., :] = ball_aero.compute_drag(ball_read_velocity).unsqueeze(
            1
        )  # [2,3]
        # pprint({"drag_ball": _ball_air_drag})
        ball.set_external_force_and_torque(
            _ball_air_drag, _zero_torques
        )  # air drag on body
        # robot.set_external_force_and_torque(_zero_thrust, _zero_torques, body_ids=_robot_body_id) # air drag on body
        ball.write_data_to_sim()
        # perform step
        sim.step()
        # following could be done automatically by scene.update
        tiled_camera.update(sim_dt)
        # rs_cam.update(sim_dt)
        ball.update(sim_dt)
        robot.update(sim_dt)
        contact_forces_net_bottom.update(sim_dt, force_recompute=True)
        # setup_advanced_camera_properties(camera_prim_path , exposure_time=1.0)
        # verify_camera_properties(camera_prim_path)
        ball_read_velocity = ball.data.root_lin_vel_b


        ball_pixel_uvz, ball_pixel_radius = _camera_tool.project_points_to_camera(
            ball_pos=ball.data.root_pos_w,
            robot_pos=robot.data.root_pos_w,
            robot_quat=robot.data.root_quat_w,
            ball_radius=cfg.ball_cfg.spawn.radius,
        )
        if cfg.my_cam_cfg.sensor_type == "rgb":
            # rgb
            rgb_image = tiled_camera.data.output["rgb"]
            rgb_np = rgb_image[0].cpu().numpy()  # [H, W, 3]
            bgr_np = cv2.cvtColor(rgb_np, cv2.COLOR_RGB2BGR)
            output_dir = "./output/rgb"
        elif cfg.my_cam_cfg.sensor_type == "depth":
            # depth
            depth_image = (
                tiled_camera.data.output["distance_to_image_plane"].clamp(0.1, 10.0)
                / 10.0
            )  # clamp to avoid inf and nan
            depth_image_np = depth_image[0].cpu().numpy() * 255.0  # [H, W, 1]
            bgr_np = cv2.cvtColor(depth_image_np, cv2.COLOR_GRAY2BGR)
            output_dir = "./output/depth"

        elif cfg.my_cam_cfg.sensor_type == "gray":
            # gray
            output_dir = "./output/gray"
            rgb_image = tiled_camera.data.output["rgb"]
            grayscale = (
                0.299 * rgb_image[..., 0]
                + 0.587 * rgb_image[..., 1]
                + 0.114 * rgb_image[..., 2]
            )
            # grayscale = T.Grayscale(rgb_image)
            if cfg.my_cam_cfg.save_type == "image":
                # gray_frames_tensor[img_count] = grayscale[0]  # [H, W]

                gray_np = grayscale[0].cpu().numpy()  # [H, W, 3]
                gray_np_distorted = cv2.undistort(
                    src=gray_np,
                    cameraMatrix=np.array(
                        cfg.my_cam_cfg.calib_intrinsic_matrix
                    ).reshape(3, 3),
                    distCoeffs=np.array(cfg.my_cam_cfg.calib_distortion_coefficients),
                )
                # diff_img = cv2.absdiff(gray_np, gray_np_distorted)
                # cv2.imwrite(os.path.join(output_dir, f"{img_count:06d}_diff.png"), diff_img)
                bgr_np = cv2.cvtColor(gray_np_distorted, cv2.COLOR_GRAY2BGR)
                # bgr_np = 0.2989 * rgb_image[:, 0:1] + 0.5870 * rgb_image[:, 1:2] + 0.1140 * rgb_image[:, 2:3]
            elif cfg.my_cam_cfg.save_type == "video":
                gray_np = grayscale[0].cpu().numpy().astype(np.uint8)  # [H, W, 3]
                bgr_np = cv2.cvtColor(gray_np, cv2.COLOR_GRAY2RGB)
                video_frame.append(bgr_np)  # [1, H, W]
                video_writer.write(bgr_np)



        if cfg.my_cam_cfg.save_type == "image":
            os.makedirs(output_dir, exist_ok=True)  # 创建文件夹
            filename = os.path.join(output_dir, f"{img_count:06d}.png")
            # cv2.imwrite(filename, bgr_np)

            u, v, z = (
                int(ball_pixel_uvz[..., 0]),
                int(ball_pixel_uvz[..., 1]),
                (ball_pixel_uvz[..., 2]),
            )
            poi_u, poi_v = (
                int(point_of_interest[0].item()),
                int(point_of_interest[1].item()),
            )
            if (
                u > 0
                and v > 0
                and u < tiled_camera_cfg.width
                and v < tiled_camera_cfg.height
                and z > 0
                and z < 30.0
            ):
                print("Ball pixel position:", u, v, z)
                # 获取像素半径 (如果提供,否则使用默认值)
                if ball_pixel_radius.item() > 0:
                    radius = int(ball_pixel_radius.item())
                else:
                    # 如果没有提供半径,使用默认值
                    radius = 2  # 默认半径
                # 绘制红色圆圈
                cv2.circle(
                    bgr_np,
                    (u, v),
                    radius,
                    (0, 0, 255),  # 红色 (BGR)
                    1,  # 线宽
                )
                cv2.circle(
                    bgr_np,
                    (poi_u, poi_v),  # drawing point of interest
                    radius,
                    (255, 0, 0),  # 蓝色 (BGR)
                    1,  # 线宽
                )
                filename = os.path.join(output_dir, f"{img_count:06d}.png")
                cv2.imwrite(filename, bgr_np)

Could you please investigate this issue? Thank you for your time and consideration!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions