Description
data:image/s3,"s3://crabby-images/d2665/d2665969a3acedca88e293a883a10cdf490cc250" alt="dbf3eae93ef3aaf3ec76c9dc8806361a"
gt_labels: When inspecting gt_labels, I found that most of its values are zero, which seems incorrect given the context of the task.
Position Mismatch: Adding gt_labels to pos1 does not result in pos2, which indicates a potential issue with the data or my understanding of how these labels should be applied.
Context
Task: I am working on a radar scene flow project. gt_labels are supposed to provide the transformation from pos1 to pos2 in terms of scene flow.
Expected Outcome: The labels should represent the scene flow vectors such that when added to pos1, they should accurately transform it to pos2.
Steps to Reproduce
Data Loading: Using a specific dataset for radar scene flow, I extract gt_labels and positional data.
Computation: I compute pos1_with_flow as pos1 + gt_labels.transpose(1, 0) and check if it equals pos2.
Verification: Using np.allclose() to verify if the computed transformation matches the expected pos2.
`import os
import numpy as np
from torch.utils.data import Dataset
import ujson
from scipy.spatial.transform import Rotation as R
class vodDataset(Dataset):
def __init__(self, args, root='/mnt/12T/fangqiang/vod_unanno/flow_smp/', partition='train', textio=None):
self.npoints = args.num_points
self.textio = textio
self.calib_path = '/root/autodl-tmp/CMFlow/dataset/vod_radar_calib.txt'
self.res = {'r_res': 0.2, # m
'theta_res': 1.5 * np.pi/180, # radian
'phi_res': 1.5 *np.pi/180 # radian
}
self.read_calib_files()
self.eval = args.eval
self.partition = partition
self.root = os.path.join(root, self.partition)
self.interval = 0.10
self.clips = sorted(os.listdir(self.root), key=lambda x: int(x.split("_")[1]))
self.samples = []
self.clips_info = []
for clip in self.clips:
clip_path = os.path.join(self.root, clip)
samples = sorted(os.listdir(clip_path), key=lambda x: int(x.split("/")[-1].split("_")[0]))
for idx in range(len(samples)):
samples[idx] = os.path.join(clip_path, samples[idx])
if self.eval:
self.clips_info.append({'clip_name': clip,
'index': [len(self.samples), len(self.samples) + len(samples)]
})
if clip[:5] == 'delft':
self.samples.extend(samples)
self.textio.cprint(self.partition + ' : ' + str(len(self.samples)))
def __getitem__(self, index):
sample = self.samples[index]
with open(sample, 'rb') as fp:
data = ujson.load(fp)
data_1 = np.array(data["pc1"]).astype('float32')
data_2 = np.array(data["pc2"]).astype('float32')
# read input data and features
interval = self.interval
pos_1 = data_1[:, 0:3]
pos_2 = data_2[:, 0:3]
vel_1 = data_1[:, 3]
vel_2 = data_2[:, 3]
rcs_1 = data_1[:, 4]
rcs_2 = data_2[:, 4]
feature_1 = data_1[:, [4, 3, 3]] # 保持原始特征格式
feature_2 = data_2[:, [4, 3, 3]] # 保持原始特征格式
# GT labels and pseudo FG labels (from lidar)
gt_labels = np.array(data["gt_labels"]).astype('float32')
print(gt_labels)
pse_labels = np.array(data["pse_labels"]).astype('float32')
# GT mask or pseudo FG mask (from lidar)
gt_mask = np.array(data["gt_mask"])
pse_mask = np.array(data["pse_mask"])
# use GT labels and motion seg. mask for evaluation on val and test set
if self.partition in ['test', 'val', 'train_anno']:
labels = gt_labels
mask = gt_mask
opt_flow = np.zeros((pos_1.shape[0], 2)).astype('float32')
radar_u = np.zeros(pos_1.shape[0]).astype('float32')
radar_v = np.zeros(pos_1.shape[0]).astype('float32')
# use pseudo FG flow labels and FG mask as supervision signals for training
else:
labels = pse_labels
mask = pse_mask
opt_info = data["opt_info"]
opt_flow = np.array(opt_info["opt_flow"]).astype('float32')
radar_u = np.array(opt_info["radar_u"]).astype('float32')
radar_v = np.array(opt_info["radar_v"]).astype('float32')
# static points transformation from frame 1 to frame 2
trans = np.linalg.inv(np.array(data["trans"])).astype('float32')
## downsample to npoints to enable fast batch processing (not in test)
if not self.eval:
npts_1 = pos_1.shape[0]
npts_2 = pos_2.shape[0]
if npts_1 < self.npoints:
supplement_1 = self.npoints - npts_1
sample_idx1 = np.arange(0, npts_1)
sample_idx1 = np.append(sample_idx1, np.random.choice(npts_1, self.npoints - npts_1, replace=True))
else:
supplement_1 = 0 # No supplementation needed
sample_idx1 = np.random.choice(npts_1, self.npoints, replace=False)
if npts_2 < self.npoints:
supplement_2 = self.npoints - npts_2
sample_idx2 = np.arange(0, npts_2)
sample_idx2 = np.append(sample_idx2, np.random.choice(npts_2, self.npoints - npts_2, replace=True))
else:
supplement_2 = 0 # No supplementation needed
sample_idx2 = np.random.choice(npts_2, self.npoints, replace=False)
# # Output or log the number of supplemented points
# print(f"Supplemented {supplement_1} points for pos_1")
# print(f"Supplemented {supplement_2} points for pos_2")
pos_1 = pos_1[sample_idx1, :]
pos_2 = pos_2[sample_idx2, :]
feature_1 = feature_1[sample_idx1, :]
feature_2 = feature_2[sample_idx2, :]
radar_u = radar_u[sample_idx1]
radar_v = radar_v[sample_idx1]
opt_flow = opt_flow[sample_idx1, :]
vel_1 = vel_1[sample_idx1] # 更新径向速度
vel_2 = vel_2[sample_idx2] # 更新径向速度
labels = labels[sample_idx1, :]
mask = mask[sample_idx1]
# # reshape to match the desired output shape
# print(f"feature1的shape是{feature_1.shape}")
# print(f"feature2的shape是{feature_1.shape}")
# feature_1 = feature_1.reshape(self.npoints, 3)
# feature_2 = feature_2.reshape(self.npoints, 3)
# vel_1 = vel_1.reshape(self.npoints)
# vel_2 = vel_2.reshape(self.npoints)
return pos_1, pos_2, feature_1, feature_2, vel_1, vel_2, trans, labels, mask, interval, radar_u, radar_v, opt_flow,supplement_1,supplement_2
def read_calib_files(self):
with open(self.calib_path, "r") as f:
lines = f.readlines()
intrinsic = np.array(lines[2].strip().split(' ')[1:], dtype=np.float32).reshape(3, 4) # Intrinsics
extrinsic = np.array(lines[5].strip().split(' ')[1:], dtype=np.float32).reshape(3, 4) # Extrinsic
extrinsic = np.concatenate([extrinsic, [[0, 0, 0, 1]]], axis=0)
self.camera_projection_matrix = intrinsic
self.t_camera_radar = extrinsic
def __len__(self):
return len(self.samples)
`import torch
import numpy as np
import pandas as pd
import os
from torch.utils.data import DataLoader
import plotly.graph_objects as go
from dataset import *
from utils import *
import argparse
class IOStream:
def init(self, path):
self.f = open(path, 'a')
def cprint(self, text):
print(text)
self.f.write(text + '\n')
self.f.flush()
def close(self):
self.f.close()
def init(args):
if not os.path.exists('checkpoints'):
os.makedirs('checkpoints')
if not os.path.exists('checkpoints/' + args.exp_name):
os.makedirs('checkpoints/' + args.exp_name)
if not os.path.exists('checkpoints/' + args.exp_name + '/' + 'models'):
os.makedirs('checkpoints/' + args.exp_name + '/' + 'models')
if not os.path.exists('checkpoints/' + args.exp_name + '/' + 'loss_train'):
os.makedirs('checkpoints/' + args.exp_name + '/' + 'loss_train')
os.system('cp main.py checkpoints/' + args.exp_name + '/' + 'main.py.backup')
os.system('cp configs.yaml checkpoints/' + args.exp_name + 'configs.yaml.backup')
def extract_data_info(data):
pos_1, pos_2, feature_1, feature_2, vel_1, vel_2, trans, labels, mask, interval, radar_u, radar_v, opt_flow, supplement_1, supplement_2 = data
pos_1 = pos_1.permute(0, 2, 1) # [batch_size, 3, npoints]
pos_2 = pos_2.permute(0, 2, 1) # [batch_size, 3, npoints]
feature_1 = feature_1.permute(0, 2, 1) # [batch_size, 3, npoints]
feature_2 = feature_2.permute(0, 2, 1) # [batch_size, 3, npoints]
pos_1 = pos_1.cuda().contiguous()
pos_2 = pos_2.cuda().contiguous()
feature_1 = feature_1.cuda().contiguous()
feature_2 = feature_2.cuda().contiguous()
vel_1 = vel_1.cuda().contiguous()
vel_2 = vel_2.cuda().contiguous()
radar_v = radar_v.cuda().contiguous()
radar_u = radar_u.cuda().contiguous()
opt_flow = opt_flow.cuda().contiguous()
mask = mask.cuda().contiguous()
trans = trans.cuda().contiguous()
interval = interval.cuda().float()
labels = labels.cuda().contiguous() # [batch_size, npoints, 3]
print("Label Shape:", labels.shape) # Verify label shape
return pos_1, pos_2, feature_1, feature_2, vel_1, vel_2, trans, labels, mask, interval, radar_u, radar_v, opt_flow, supplement_1, supplement_2
def compute_kde(pos_data, feature_data, kernel_func, bandwidth, target_neighbors=8, radius=1.0):
B, _, N = pos_data.shape
if feature_data.dim() == 3:
_, D, _ = feature_data.shape
feature_data = feature_data.permute(0, 2, 1) # [B, N, D]
else:
D = 1
feature_data = feature_data.unsqueeze(-1) # [B, N, 1]
kde_values = torch.zeros((B, N), device=pos_data.device)
counts_below = []
counts_above = []
total_neighbors = 0
total_points = 0
for b in range(B):
for i in range(N):
point_pos = pos_data[b, :, i]
pos_diffs = pos_data[b] - point_pos.unsqueeze(1)
in_radius_mask = (torch.abs(pos_diffs[0, :]) <= radius) & \
(torch.abs(pos_diffs[1, :]) <= radius) & \
(torch.abs(pos_diffs[2, :]) <= radius)
M_p = in_radius_mask.sum().item()
total_neighbors += M_p
total_points += 1
if M_p > 1:
valid_feature_points = feature_data[b, in_radius_mask]
feature_diffs = valid_feature_points - feature_data[b, i].unsqueeze(0)
feature_distances = torch.norm(feature_diffs, dim=-1) if D > 1 else feature_diffs.abs()
kernel_values = kernel_func(feature_distances, bandwidth)
kde_value = kernel_values.sum() / 8
kde_values[b, i] = kde_value
if M_p < target_neighbors:
counts_below.append(kde_value.item())
else:
counts_above.append(kde_value.item())
else:
kde_values[b, i] = 0
average_neighbors = total_neighbors / total_points if total_points > 0 else 0
print(f"在半径 {radius} 范围内的平均邻居数量: {average_neighbors}")
return kde_values, counts_below, counts_above
def gaussian_kernel(distance, bandwidth):
return torch.exp(-0.5 * (distance / bandwidth) ** 2) / (bandwidth * np.sqrt(2 * np.pi))
def save_kde_to_csv(kde_values, sample_idx, sample_type):
kde_values_np = kde_values.cpu().numpy().flatten()
df = pd.DataFrame({'KDE Values': kde_values_np})
filename = f'kde_sample_{sample_idx}_{sample_type}.csv'
df.to_csv(filename, index=False)
print(f"KDE值已保存到 {filename}")
def plot_3d_points_with_scene_flow(pos1, pos2, labels, sample_idx):
pos1_np = pos1.cpu().numpy().squeeze()
pos2_np = pos2.cpu().numpy().squeeze()
labels_np = labels.cpu().numpy().squeeze()
pos1_with_flow = pos1_np + labels_np# Ensure the shape matches
fig = go.Figure()
# pos1 points
fig.add_trace(go.Scatter3d(
x=pos1_np[0], y=pos1_np[1], z=pos1_np[2],
mode='markers',
marker=dict(size=4, color='red', opacity=0.8),
name='pos1'
))
# pos2 points
fig.add_trace(go.Scatter3d(
x=pos2_np[0], y=pos2_np[1], z=pos2_np[2],
mode='markers',
marker=dict(size=4, color='blue', opacity=0.8),
name='pos2'
))
# pos1 + scene flow points
fig.add_trace(go.Scatter3d(
x=pos1_with_flow[0], y=pos1_with_flow[1], z=pos1_with_flow[2],
mode='markers',
marker=dict(size=4, color='green', opacity=0.8),
name='pos1 + scene flow'
))
# Add arrows to represent scene flow vectors
for i in range(pos1_np.shape[1]): # Loop over each point
fig.add_trace(go.Cone(
x=[pos1_np[0, i]],
y=[pos1_np[1, i]],
z=[pos1_np[2, i]],
u=[labels_np[i, 0]], # Flow vector x-component
v=[labels_np[i, 1]], # Flow vector y-component
w=[labels_np[i, 2]], # Flow vector z-component
showscale=False,
colorscale=[[0, 'rgb(0,255,0)'], [1, 'rgb(0,255,0)']],
sizemode='scaled',
sizeref=0.2,
name='Scene Flow'
))
fig.update_layout(
title=f'3D Scatter Plot with Scene Flow - Sample {sample_idx}',
scene=dict(
xaxis_title='X',
yaxis_title='Y',
zaxis_title='Z'
)
)
# Save the plot as an interactive HTML file
filename = f'3d_plot_sample_{sample_idx}_scene_flow.html'
fig.write_html(filename)
print(f"Interactive 3D plot saved as {filename}")
def visualize_first_batch_kde(data_loader, kernel_func=gaussian_kernel, bandwidth=0.5, radius=1.0):
with torch.no_grad():
for batch_idx, data in enumerate(data_loader):
if batch_idx == 0:
pos1, pos2, feature1, feature2, vel1, vel2, _, labels, _, _, _, _, _, supplement_1, supplement_2 = extract_data_info(data)
print(labels.shape)
for sample_idx in range(pos1.size(0)):
pos1_sample = pos1[sample_idx].unsqueeze(0)
pos2_sample = pos2[sample_idx].unsqueeze(0)
labels_sample = labels[sample_idx].permute(1,0).unsqueeze(0)
print(labels_sample)
feature1_sample = feature1[sample_idx].unsqueeze(0) # Corrected variable name
feature2_sample = feature2[sample_idx].unsqueeze(0) # Corrected variable name
# Compute KDE
kde_values_pos1, counts_below_pos1, counts_above_pos1 = compute_kde(pos1_sample, pos1_sample, kernel_func, bandwidth, radius=radius)
kde_values_pos2, counts_below_pos2, counts_above_pos2 = compute_kde(pos2_sample, pos2_sample, kernel_func, bandwidth, radius=radius)
kde_values_feature1, counts_below_feature1, counts_above_feature1 = compute_kde(pos1_sample, feature1_sample, kernel_func, bandwidth, radius=radius)
kde_values_feature2, counts_below_feature2, counts_above_feature2 = compute_kde(pos2_sample, feature2_sample, kernel_func, bandwidth, radius=radius)
kde_values_vel1, counts_below_vel1, counts_above_vel1 = compute_kde(pos1_sample, vel1[sample_idx].unsqueeze(0), kernel_func, bandwidth, radius=radius)
kde_values_vel2, counts_below_vel2, counts_above_vel2 = compute_kde(pos2_sample, vel2[sample_idx].unsqueeze(0), kernel_func, bandwidth, radius=radius)
save_kde_to_csv(kde_values_pos1, sample_idx, 'pos1')
save_kde_to_csv(kde_values_pos2, sample_idx, 'pos2')
save_kde_to_csv(kde_values_feature1, sample_idx, 'feature1')
save_kde_to_csv(kde_values_feature2, sample_idx, 'feature2')
save_kde_to_csv(kde_values_vel1, sample_idx, 'vel1')
save_kde_to_csv(kde_values_vel2, sample_idx, 'vel2')
# Save the 3D plot as an interactive HTML file
plot_3d_points_with_scene_flow(pos1_sample, pos2_sample, labels_sample, sample_idx)
print(f"样本 {sample_idx} pos1补充了 {supplement_1[sample_idx]} 个点")
print(f"样本 {sample_idx} pos2补充了 {supplement_2[sample_idx]} 个点")
break # Only process the first batch
def main(io_args):
args = parse_args_from_yaml("configs_pretrained.yaml")
args.eval = io_args.eval
args.dataset_path = io_args.dataset_path
args.exp_name = io_args.exp_name
args.model = io_args.model
args.save_res = io_args.save_res
args.dataset = io_args.dataset
# Initialize checkpoints and logs
_init_(args)
log_path = os.path.join('checkpoints', args.exp_name, 'run.log')
textio = IOStream(log_path)
textio.cprint(f"Configuration: {args}")
# Initialize dataset and data loader
dataset_cls = dataset_dict[args.dataset]
train_set = dataset_cls(args=args, root=args.dataset_path, partition='train', textio=textio)
train_loader = DataLoader(train_set, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True, drop_last=True)
# Set bandwidth and radius for visualization
bandwidth = 0.05 # Adjust according to data characteristics
radius = 1.5 # Set neighborhood radius
visualize_first_batch_kde(train_loader, bandwidth=bandwidth, radius=radius)
if name == 'main':
parser = argparse.ArgumentParser(description='Radar Scene Flow')
parser.add_argument('--eval', action='store_true')
parser.add_argument('--vis', action='store_true')
parser.add_argument('--save_res', action='store_true')
parser.add_argument('--dataset_path', type=str, default='/root/autodl-tmp/CMFlow/preprocessed_data_com/flow_smp')
parser.add_argument('--exp_name', type=str, default='test')
parser.add_argument('--model', type=str, default='cmflow')
parser.add_argument('--dataset', type=str, default='vodDataset')
args = parser.parse_args()
main(args)`
Troubleshooting Steps Taken
Data Inspection: Verified the data loading and parsing process to ensure that gt_labels are being read correctly from the dataset.
Debugging: Added debug prints to check the shapes and values of gt_labels, pos1, and pos2.
My Questions
Why are gt_labels mostly zeros? Could there be an issue with how the data is generated or preprocessed?
Why doesn’t pos1 + label equal pos2? Could this be a problem with my understanding of how scene flow labels are applied, or is it an issue with the dataset?
What steps should I take to verify the correctness of gt_labels? Are there common practices to debug or validate these types of data?
Request for Assistance
Insights: Looking for insights into why gt_labels might be zeros and how to address this.
Suggestions: Any suggestions on verifying and validating the correctness of scene flow labels.
Code Corrections: If there are mistakes in my current approach, I'd appreciate guidance on correcting them.
Attachments
I've attached a 3D plot generated using Plotly to visualize pos1, pos2, and the scene flow:
Red Points: Represent pos1.
Blue Points: Represent pos2.
Green Arrows: Represent the scene flow vectors from pos1 to pos2.
Activity