Skip to content

Commit 88b8694

Browse files
[Feature] Support PGD and multi-view FCOS3D++ on Waymo (#2835)
Co-authored-by: JingweiZhang12 <[email protected]> Co-authored-by: sjh <sunjiahao1999>
1 parent 2dad86c commit 88b8694

16 files changed

+909
-127
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
# dataset settings
2+
# D3 in the config name means the whole dataset is divided into 3 folds
3+
# We only use one fold for efficient experiments
4+
dataset_type = 'WaymoDataset'
5+
data_root = 'data/waymo/kitti_format/'
6+
class_names = ['Pedestrian', 'Cyclist', 'Car']
7+
metainfo = dict(classes=class_names)
8+
input_modality = dict(use_lidar=False, use_camera=True)
9+
10+
# Example to use different file client
11+
# Method 1: simply set the data root and let the file I/O module
12+
# automatically infer from prefix (not support LMDB and Memcache yet)
13+
14+
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15+
16+
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
17+
# backend_args = dict(
18+
# backend='petrel',
19+
# path_mapping=dict({
20+
# './data/': 's3://openmmlab/datasets/detection3d/',
21+
# 'data/': 's3://openmmlab/datasets/detection3d/'
22+
# }))
23+
backend_args = None
24+
25+
train_pipeline = [
26+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27+
dict(
28+
type='LoadAnnotations3D',
29+
with_bbox=True,
30+
with_label=True,
31+
with_attr_label=False,
32+
with_bbox_3d=True,
33+
with_label_3d=True,
34+
with_bbox_depth=True),
35+
# base shape (1248, 832), scale (0.95, 1.05)
36+
dict(
37+
type='RandomResize3D',
38+
scale=(1248, 832),
39+
ratio_range=(0.95, 1.05),
40+
# ratio_range=(1., 1.),
41+
interpolation='nearest',
42+
keep_ratio=True,
43+
),
44+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45+
dict(
46+
type='Pack3DDetInputs',
47+
keys=[
48+
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49+
'gt_labels_3d', 'centers_2d', 'depths'
50+
]),
51+
]
52+
53+
test_pipeline = [
54+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55+
dict(
56+
type='RandomResize3D',
57+
scale=(1248, 832),
58+
ratio_range=(1., 1.),
59+
interpolation='nearest',
60+
keep_ratio=True),
61+
dict(
62+
type='Pack3DDetInputs',
63+
keys=['img'],
64+
meta_keys=[
65+
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
66+
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
67+
]),
68+
]
69+
# construct a pipeline for data and gt loading in show function
70+
# please keep its loading function consistent with test_pipeline (e.g. client)
71+
eval_pipeline = [
72+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
73+
dict(
74+
type='RandomResize3D',
75+
scale=(1248, 832),
76+
ratio_range=(1., 1.),
77+
interpolation='nearest',
78+
keep_ratio=True),
79+
dict(
80+
type='Pack3DDetInputs',
81+
keys=['img'],
82+
meta_keys=[
83+
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
84+
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
85+
]),
86+
]
87+
88+
train_dataloader = dict(
89+
batch_size=3,
90+
num_workers=3,
91+
persistent_workers=True,
92+
sampler=dict(type='DefaultSampler', shuffle=True),
93+
dataset=dict(
94+
type=dataset_type,
95+
data_root=data_root,
96+
ann_file='waymo_infos_train.pkl',
97+
data_prefix=dict(
98+
pts='training/velodyne',
99+
CAM_FRONT='training/image_0',
100+
CAM_FRONT_LEFT='training/image_1',
101+
CAM_FRONT_RIGHT='training/image_2',
102+
CAM_SIDE_LEFT='training/image_3',
103+
CAM_SIDE_RIGHT='training/image_4'),
104+
pipeline=train_pipeline,
105+
modality=input_modality,
106+
test_mode=False,
107+
metainfo=metainfo,
108+
cam_sync_instances=True,
109+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
111+
box_type_3d='Camera',
112+
load_type='fov_image_based',
113+
# load one frame every three frames
114+
load_interval=3,
115+
backend_args=backend_args))
116+
117+
val_dataloader = dict(
118+
batch_size=1,
119+
num_workers=1,
120+
persistent_workers=True,
121+
drop_last=False,
122+
sampler=dict(type='DefaultSampler', shuffle=False),
123+
dataset=dict(
124+
type=dataset_type,
125+
data_root=data_root,
126+
data_prefix=dict(
127+
pts='training/velodyne',
128+
CAM_FRONT='training/image_0',
129+
CAM_FRONT_LEFT='training/image_1',
130+
CAM_FRONT_RIGHT='training/image_2',
131+
CAM_SIDE_LEFT='training/image_3',
132+
CAM_SIDE_RIGHT='training/image_4'),
133+
ann_file='waymo_infos_val.pkl',
134+
pipeline=eval_pipeline,
135+
modality=input_modality,
136+
test_mode=True,
137+
metainfo=metainfo,
138+
cam_sync_instances=True,
139+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
140+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
141+
box_type_3d='Camera',
142+
load_type='fov_image_based',
143+
load_eval_anns=False,
144+
backend_args=backend_args))
145+
146+
test_dataloader = dict(
147+
batch_size=1,
148+
num_workers=1,
149+
persistent_workers=True,
150+
drop_last=False,
151+
sampler=dict(type='DefaultSampler', shuffle=False),
152+
dataset=dict(
153+
type=dataset_type,
154+
data_root=data_root,
155+
data_prefix=dict(
156+
pts='training/velodyne',
157+
CAM_FRONT='training/image_0',
158+
CAM_FRONT_LEFT='training/image_1',
159+
CAM_FRONT_RIGHT='training/image_2',
160+
CAM_SIDE_LEFT='training/image_3',
161+
CAM_SIDE_RIGHT='training/image_4'),
162+
ann_file='waymo_infos_val.pkl',
163+
pipeline=eval_pipeline,
164+
modality=input_modality,
165+
test_mode=True,
166+
metainfo=metainfo,
167+
cam_sync_instances=True,
168+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
169+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
170+
box_type_3d='Camera',
171+
load_type='fov_image_based',
172+
backend_args=backend_args))
173+
174+
val_evaluator = dict(
175+
type='WaymoMetric',
176+
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
177+
metric='LET_mAP',
178+
load_type='fov_image_based',
179+
result_prefix='./pgd_fov_pred')
180+
test_evaluator = val_evaluator
181+
182+
vis_backends = [dict(type='LocalVisBackend')]
183+
visualizer = dict(
184+
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# dataset settings
2+
# D3 in the config name means the whole dataset is divided into 3 folds
3+
# We only use one fold for efficient experiments
4+
dataset_type = 'WaymoDataset'
5+
data_root = 'data/waymo/kitti_format/'
6+
class_names = ['Pedestrian', 'Cyclist', 'Car']
7+
metainfo = dict(classes=class_names)
8+
input_modality = dict(use_lidar=False, use_camera=True)
9+
10+
# Example to use different file client
11+
# Method 1: simply set the data root and let the file I/O module
12+
# automatically infer from prefix (not support LMDB and Memcache yet)
13+
14+
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15+
16+
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
17+
# backend_args = dict(
18+
# backend='petrel',
19+
# path_mapping=dict({
20+
# './data/': 's3://openmmlab/datasets/detection3d/',
21+
# 'data/': 's3://openmmlab/datasets/detection3d/'
22+
# }))
23+
backend_args = None
24+
25+
train_pipeline = [
26+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27+
dict(
28+
type='LoadAnnotations3D',
29+
with_bbox=True,
30+
with_label=True,
31+
with_attr_label=False,
32+
with_bbox_3d=True,
33+
with_label_3d=True,
34+
with_bbox_depth=True),
35+
# base shape (1248, 832), scale (0.95, 1.05)
36+
dict(
37+
type='RandomResize3D',
38+
scale=(1248, 832),
39+
# ratio_range=(1., 1.),
40+
ratio_range=(0.95, 1.05),
41+
interpolation='nearest',
42+
keep_ratio=True,
43+
),
44+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45+
dict(
46+
type='Pack3DDetInputs',
47+
keys=[
48+
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49+
'gt_labels_3d', 'centers_2d', 'depths'
50+
]),
51+
]
52+
53+
test_pipeline = [
54+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55+
dict(
56+
type='Resize3D',
57+
scale_factor=0.65,
58+
interpolation='nearest',
59+
keep_ratio=True),
60+
dict(
61+
type='Pack3DDetInputs',
62+
keys=['img'],
63+
meta_keys=[
64+
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
65+
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
66+
]),
67+
]
68+
# construct a pipeline for data and gt loading in show function
69+
# please keep its loading function consistent with test_pipeline (e.g. client)
70+
eval_pipeline = [
71+
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
72+
dict(
73+
type='Resize3D',
74+
scale_factor=0.65,
75+
interpolation='nearest',
76+
keep_ratio=True),
77+
dict(
78+
type='Pack3DDetInputs',
79+
keys=['img'],
80+
meta_keys=[
81+
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
82+
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
83+
]),
84+
]
85+
86+
train_dataloader = dict(
87+
batch_size=3,
88+
num_workers=3,
89+
persistent_workers=True,
90+
sampler=dict(type='DefaultSampler', shuffle=True),
91+
dataset=dict(
92+
type=dataset_type,
93+
data_root=data_root,
94+
ann_file='waymo_infos_train.pkl',
95+
data_prefix=dict(
96+
pts='training/velodyne',
97+
CAM_FRONT='training/image_0',
98+
CAM_FRONT_LEFT='training/image_1',
99+
CAM_FRONT_RIGHT='training/image_2',
100+
CAM_SIDE_LEFT='training/image_3',
101+
CAM_SIDE_RIGHT='training/image_4'),
102+
pipeline=train_pipeline,
103+
modality=input_modality,
104+
test_mode=False,
105+
metainfo=metainfo,
106+
cam_sync_instances=True,
107+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
108+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
109+
box_type_3d='Camera',
110+
load_type='mv_image_based',
111+
# load one frame every three frames
112+
load_interval=3,
113+
backend_args=backend_args))
114+
115+
val_dataloader = dict(
116+
batch_size=1,
117+
num_workers=0,
118+
persistent_workers=False,
119+
drop_last=False,
120+
sampler=dict(type='DefaultSampler', shuffle=False),
121+
dataset=dict(
122+
type=dataset_type,
123+
data_root=data_root,
124+
data_prefix=dict(
125+
pts='training/velodyne',
126+
CAM_FRONT='training/image_0',
127+
CAM_FRONT_LEFT='training/image_1',
128+
CAM_FRONT_RIGHT='training/image_2',
129+
CAM_SIDE_LEFT='training/image_3',
130+
CAM_SIDE_RIGHT='training/image_4'),
131+
ann_file='waymo_infos_val.pkl',
132+
pipeline=eval_pipeline,
133+
modality=input_modality,
134+
test_mode=True,
135+
metainfo=metainfo,
136+
cam_sync_instances=True,
137+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
138+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
139+
box_type_3d='Camera',
140+
load_type='mv_image_based',
141+
# load_eval_anns=False,
142+
backend_args=backend_args))
143+
144+
test_dataloader = dict(
145+
batch_size=1,
146+
num_workers=0,
147+
persistent_workers=False,
148+
drop_last=False,
149+
sampler=dict(type='DefaultSampler', shuffle=False),
150+
dataset=dict(
151+
type=dataset_type,
152+
data_root=data_root,
153+
data_prefix=dict(
154+
pts='training/velodyne',
155+
CAM_FRONT='training/image_0',
156+
CAM_FRONT_LEFT='training/image_1',
157+
CAM_FRONT_RIGHT='training/image_2',
158+
CAM_SIDE_LEFT='training/image_3',
159+
CAM_SIDE_RIGHT='training/image_4'),
160+
ann_file='waymo_infos_val.pkl',
161+
pipeline=eval_pipeline,
162+
modality=input_modality,
163+
test_mode=True,
164+
metainfo=metainfo,
165+
cam_sync_instances=True,
166+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
167+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
168+
box_type_3d='Camera',
169+
load_type='mv_image_based',
170+
load_eval_anns=False,
171+
backend_args=backend_args))
172+
173+
val_evaluator = dict(
174+
type='WaymoMetric',
175+
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
176+
metric='LET_mAP',
177+
load_type='mv_image_based',
178+
result_prefix='./pgd_mv_pred',
179+
nms_cfg=dict(
180+
use_rotate_nms=True,
181+
nms_across_levels=False,
182+
nms_pre=500,
183+
nms_thr=0.05,
184+
score_thr=0.001,
185+
min_bbox_size=0,
186+
max_per_frame=100))
187+
test_evaluator = val_evaluator
188+
189+
vis_backends = [dict(type='LocalVisBackend')]
190+
visualizer = dict(
191+
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

0 commit comments

Comments
 (0)