-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
295 lines (229 loc) · 11.6 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
from scipy.spatial import cKDTree
import open3d as o3d
import numpy as np
import torch
import roma
import cv2
def depth_map_to_pcl(depth_map, cam_fov):
"""
Converts a depth map to a point cloud using PyTorch.
Args:
depth_map (torch.Tensor): Depth map of shape (H, W).
cam_fov (float): Camera field of view in degrees.
Returns:
torch.Tensor: Point cloud of shape (H*W, 3).
torch.Tensor: Intrinsic parameters [f_x, f_y, W, H].
"""
# Infer camera intrinsics using the pinhole model
H, W = depth_map.shape
fov_h_rad = torch.deg2rad(torch.tensor(cam_fov))
f_x = W / (2 * torch.tan(fov_h_rad / 2))
f_y = f_x # Assuming square images
c_x, c_y = W / 2, H / 2
# Store a tensor of intrinsics
intrinsics = torch.tensor([f_x, f_y, W, H], dtype=torch.float32)
# Create pixel coordinate grid
u = torch.arange(W, dtype=torch.float32).unsqueeze(0).expand(H, W)
v = torch.arange(H, dtype=torch.float32).unsqueeze(1).expand(H, W)
# Convert to normalized coordinates
x = (u - c_x) / f_x
y = (v - c_y) / f_y
# Compute 3D coordinates in camera frame
Z = -depth_map # Negative sign due to camera coordinate convention
X = -x * Z # Negative sign due to camera coordinate convention
Y = y * Z
# Stack to form the point cloud, of size (H*W, 3)
point_cloud = torch.stack([X, Y, Z], dim=-1).reshape(-1, 3)
return point_cloud, intrinsics
def quat_to_4x4_homog(pos, quat):
"""
Converts a position and unit quaternion into a 4x4 homogeneous transformation matrix.
Args:
pos (array-like or torch.Tensor): Translation vector of shape (3,).
quat (array-like or torch.Tensor): Unit quaternion [x, y, z, w] of shape (4,).
Returns:
torch.Tensor: A 4x4 homogeneous transformation matrix.
Notes:
- Assumes the quaternion follows the (x, y, z, w) convention.
- Uses `roma.unitquat_to_rotmat()` to convert the quaternion to a rotation matrix.
- The output matrix can be used to transform points from one coordinate frame to another.
"""
quat = torch.tensor(quat, dtype=torch.float32)
pos = torch.tensor(pos, dtype=torch.float32)
rot = roma.unitquat_to_rotmat(quat) # This uses the xyzw convention, as does the get_pos_quat function
homog = torch.eye(4, dtype=torch.float32)
homog[:3, :3] = rot
homog[:3, 3] = pos
return homog
def transform_to_world(T_cam_world, point_cloud):
"""
Transforms a point cloud from the camera frame to the world frame using a homogeneous transformation matrix.
Args:
T_cam_world (torch.Tensor): A 4x4 homogeneous transformation matrix representing the camera pose in the world frame.
point_cloud (torch.Tensor): A tensor of shape (N, 3) containing 3D points in the camera frame.
Returns:
torch.Tensor: A tensor of shape (N, 3) containing the transformed 3D points in the world frame.
Notes:
- The function first converts the point cloud to homogeneous coordinates by appending a column of ones.
- It then applies the transformation using matrix multiplication.
- The final result extracts only the (x, y, z) coordinates from the homogeneous output.
"""
point_cloud_extended = torch.concatenate((point_cloud, torch.ones((point_cloud.shape[0], 1))), axis=1)
transformed_point_cloud = point_cloud_extended @ T_cam_world.T
transformed_point_cloud = transformed_point_cloud[:,:3]
return transformed_point_cloud
def find_common_points(pcl_0, pcl_1, threshold=0.01):
"""
Identifies common points between two point clouds by finding nearest neighbors within a specified threshold.
Args:
pcl_0 (numpy.ndarray): A point cloud of shape (N, 3).
pcl_1 (numpy.ndarray): A second point cloud of shape (M, 3).
threshold (float, optional): The maximum allowed distance between matched points. Defaults to 0.01.
Returns:
numpy.ndarray: A filtered point cloud containing only the points from `pcl_0` that have a match in `pcl_1` within the threshold.
Notes:
- Uses a KDTree for fast nearest-neighbor search.
- Points in `pcl_0` are retained if they have a neighbor in `pcl_1` within the given threshold.
- This method assumes both point clouds are already aligned in the same coordinate frame.
"""
tree = cKDTree(pcl_1)
distances, _ = tree.query(pcl_0, distance_upper_bound=threshold)
mask = distances < threshold
return pcl_0[mask]
def project_to_image(point_cloud, intrinsics, T_cam_world, method="torch"):
"""
Projects a 3D point cloud onto a 2D image plane using camera intrinsics and extrinsics.
Args:
point_cloud (torch.Tensor): A tensor of shape (N, 3) representing 3D points in the world coordinate frame.
intrinsics (list or tuple): Camera intrinsics [f_x, f_y, W, H], where:
- f_x, f_y: Focal lengths in pixels.
- W, H: Image width and height.
T_cam_world (torch.Tensor): A 4x4 homogeneous transformation matrix representing the camera pose in the world frame.
method (str, optional): Projection method. Choose between:
- "torch": Uses PyTorch-based matrix operations for projection.
- "opencv": Uses OpenCV's `cv2.projectPoints` for projection.
Default is "torch".
Returns:
torch.Tensor: A tensor of shape (N, 2) containing the 2D pixel coordinates of the projected points.
Raises:
ValueError: If an invalid method is specified.
Notes:
- The function first transforms the 3D points from the world frame into the camera frame.
- The "torch" method applies a perspective projection using matrix multiplications.
- The "opencv" method uses Rodrigues' rotation formula and OpenCV's `cv2.projectPoints`.
- The x-coordinates are flipped to account for differences in coordinate conventions.
- The function assumes no lens distortion (distCoeffs set to an empty array in OpenCV).
"""
if method == "torch":
# Calculate transformation from world back to cam0's frame
T_world_cam = torch.linalg.inv(T_cam_world)
# Move points from world into cam0's frame
point_cloud_extended = torch.cat((point_cloud, torch.ones((point_cloud.shape[0], 1))), dim=1)
transformed_point_cloud = (point_cloud_extended @ T_world_cam.T)[:, :3] # Extract XYZ
# Project into ideal camera via perspective transformation
# NOTE - this clone is redundant, but has been kept for readability
pixel_coords = transformed_point_cloud.clone() # (N, 3)
# Map the ideal image into the real image using intrinsic matrix
f_x, f_y, W, H = intrinsics
K = torch.tensor([
[f_x, 0, W / 2],
[ 0, f_y, H / 2],
[ 0, 0, 1 ]
], dtype=torch.float32)
# Apply intrinsics
pixel_coords = (K @ pixel_coords.T).T # (N, 3)
# Convert from homogeneous to Cartesian by dividing by depth (Z)
pixel_coords = pixel_coords[:, :2] / pixel_coords[:, 2:3]
elif method == "opencv":
# Calculate transformation from world back to cam0's frame
T_world_cam = torch.linalg.inv(T_cam_world)
# Put everything into OpenCV format
rotation = T_world_cam[:3,:3]
rotvec = roma.rotmat_to_rotvec(rotation)
translation = T_world_cam[:3,3]
# Define intrinsic matrix K
f_x, f_y, W, H = intrinsics
K = torch.zeros([3,3])
K[0, 0], K[1, 1], K[0,2], K[1,2] = f_x, f_y, W/2, H/2
# Project points
pixel_coords, _ = cv2.projectPointT_world_cams(
objectPoints=np.array(point_cloud, dtype=np.float32),
rvec=np.array(rotvec, dtype=np.float32),
tvec=np.array(translation, dtype=np.float32),
cameraMatrix=np.array(K, dtype=np.float32),
distCoeffs=np.array([])
)
# Convert back to a PyTorch tensor
pixel_coords = torch.tensor(pixel_coords)
# Remove the middle dimension that cv2.projectPoints creates
pixel_coords = pixel_coords.squeeze(axis=1)
else:
raise ValueError("Invalid method. Choose 'torch' or 'opencv'.")
# Flip x coords due to difference in coordinate convention
pixel_coords[:,0] = W - pixel_coords[:,0]
return pixel_coords
def remove_border_points(ps_0, ps_1, intrinsics_0, intrinsics_1):
"""
Filters out points that fall outside the valid image boundaries in both camera views.
Args:
ps_0 (torch.Tensor): A tensor of shape (N, 2) representing pixel coordinates in the first image.
ps_1 (torch.Tensor): A tensor of shape (N, 2) representing pixel coordinates in the second image.
intrinsics_0 (list or tuple): Camera intrinsics [f_x, f_y, W, H] for the first camera.
intrinsics_1 (list or tuple): Camera intrinsics [f_x, f_y, W, H] for the second camera.
Returns:
tuple:
- ps_0 (torch.Tensor): Filtered pixel coordinates for the first image.
- ps_1 (torch.Tensor): Filtered pixel coordinates for the second image.
Notes:
- First, points in `ps_0` are filtered based on the image size of the first camera.
- Corresponding points in `ps_1` are also filtered accordingly.
- The process is then repeated for `ps_1`, ensuring that only points within both image boundaries remain.
"""
_, _, W, H = intrinsics_0
mask_0 = (
(ps_0[:, 0] >= 0) & (ps_0[:, 0] < W) & # Within image width
(ps_0[:, 1] >= 0) & (ps_0[:, 1] < H) # Within image height
)
ps_0 = ps_0[mask_0]
ps_1 = ps_1[mask_0]
_, _, W, H = intrinsics_1
mask_1 = (
(ps_1[:, 0] >= 0) & (ps_1[:, 0] < W) &
(ps_1[:, 1] >= 0) & (ps_1[:, 1] < H)
)
ps_0 = ps_0[mask_1]
ps_1 = ps_1[mask_1]
return ps_0, ps_1
def visualise_cams_clouds(point_cloud_0=None, camera_0=None, point_cloud_1=None, camera_1=None):
"""
Visualizes one or two point clouds along with their corresponding camera coordinate frames
using Open3D.
Args:
point_cloud_0 (numpy.ndarray or torch.Tensor, optional):
The first point cloud of shape (N, 3). Colored red.
camera_0 (list of o3d.geometry.Geometry, optional):
A list of Open3D geometries representing the first camera's coordinate frame.
point_cloud_1 (numpy.ndarray or torch.Tensor, optional):
The second point cloud of shape (M, 3). Colored green.
camera_1 (list of o3d.geometry.Geometry, optional):
A list of Open3D geometries representing the second camera's coordinate frame.
Behavior:
- If `point_cloud_1` and `camera_1` are provided, both point clouds and cameras are displayed.
- If only `point_cloud_0` and `camera_0` are provided, only the first set is displayed.
- The first point cloud is colored red, and the second is colored green.
Returns:
None. The function launches an interactive Open3D visualization window.
Notes:
- The function assumes that the camera coordinate frames are precomputed Open3D geometries.
- Point clouds are converted into Open3D format before visualization.
"""
pcd0 = o3d.geometry.PointCloud()
pcd0.points = o3d.utility.Vector3dVector(point_cloud_0)
pcd0.paint_uniform_color([1,0,0])
if point_cloud_1 is None:
o3d.visualization.draw_geometries([pcd0] + camera_0)
else:
pcd1 = o3d.geometry.PointCloud()
pcd1.points = o3d.utility.Vector3dVector(point_cloud_1)
pcd1.paint_uniform_color([0,1,0])
o3d.visualization.draw_geometries([pcd0] + [pcd1] + camera_0 + camera_1)