2727import rfdetr .datasets .transforms as T
2828
2929
30- def compute_multi_scale_scales (resolution , expanded_scales = False ):
31- if resolution == 640 :
32- # assume we're doing the original 640x640 and therefore patch_size is 16
33- patch_size = 16
34- elif resolution % (14 * 4 ) == 0 :
35- # assume we're doing some dinov2 resolution variant and therefore patch_size is 14
36- patch_size = 14
37- elif resolution % (16 * 4 ) == 0 :
38- # assume we're doing some other resolution and therefore patch_size is 16
39- patch_size = 16
40- else :
41- raise ValueError (f"Resolution { resolution } is not divisible by 16*4 or 14*4" )
30+ def compute_multi_scale_scales (resolution , expanded_scales = False , patch_size = 16 , num_windows = 4 ):
4231 # round to the nearest multiple of 4*patch_size to enable both patching and windowing
43- base_num_patches_per_window = resolution // (patch_size * 4 )
32+ base_num_patches_per_window = resolution // (patch_size * num_windows )
4433 offsets = [- 3 , - 2 , - 1 , 0 , 1 , 2 , 3 , 4 ] if not expanded_scales else [- 5 , - 4 , - 3 , - 2 , - 1 , 0 , 1 , 2 , 3 , 4 , 5 ]
4534 scales = [base_num_patches_per_window + offset for offset in offsets ]
46- proposed_scales = [scale * patch_size * 4 for scale in scales ]
47- proposed_scales = [scale for scale in proposed_scales if scale >= patch_size * 4 ] # ensure minimum image size
35+ proposed_scales = [scale * patch_size * num_windows for scale in scales ]
36+ proposed_scales = [scale for scale in proposed_scales if scale >= patch_size * num_windows * 2 ] # ensure minimum image size
4837 return proposed_scales
4938
5039
@@ -107,7 +96,7 @@ def __call__(self, image, target):
10796 return image , target
10897
10998
110- def make_coco_transforms (image_set , resolution , multi_scale = False , expanded_scales = False ):
99+ def make_coco_transforms (image_set , resolution , multi_scale = False , expanded_scales = False , skip_random_resize = False , patch_size = 16 , num_windows = 4 ):
111100
112101 normalize = T .Compose ([
113102 T .ToTensor (),
@@ -117,7 +106,9 @@ def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scal
117106 scales = [resolution ]
118107 if multi_scale :
119108 # scales = [448, 512, 576, 640, 704, 768, 832, 896]
120- scales = compute_multi_scale_scales (resolution , expanded_scales )
109+ scales = compute_multi_scale_scales (resolution , expanded_scales , patch_size , num_windows )
110+ if skip_random_resize :
111+ scales = [scales [- 1 ]]
121112 print (scales )
122113
123114 if image_set == 'train' :
@@ -148,7 +139,7 @@ def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scal
148139 raise ValueError (f'unknown { image_set } ' )
149140
150141
151- def make_coco_transforms_square_div_64 (image_set , resolution , multi_scale = False , expanded_scales = False ):
142+ def make_coco_transforms_square_div_64 (image_set , resolution , multi_scale = False , expanded_scales = False , skip_random_resize = False , patch_size = 16 , num_windows = 4 ):
152143 """
153144 """
154145
@@ -161,7 +152,9 @@ def make_coco_transforms_square_div_64(image_set, resolution, multi_scale=False,
161152 scales = [resolution ]
162153 if multi_scale :
163154 # scales = [448, 512, 576, 640, 704, 768, 832, 896]
164- scales = compute_multi_scale_scales (resolution , expanded_scales )
155+ scales = compute_multi_scale_scales (resolution , expanded_scales , patch_size , num_windows )
156+ if skip_random_resize :
157+ scales = [scales [- 1 ]]
165158 print (scales )
166159
167160 if image_set == 'train' :
@@ -220,9 +213,25 @@ def build(image_set, args, resolution):
220213
221214
222215 if square_resize_div_64 :
223- dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms_square_div_64 (image_set , resolution , multi_scale = args .multi_scale , expanded_scales = args .expanded_scales ))
216+ dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms_square_div_64 (
217+ image_set ,
218+ resolution ,
219+ multi_scale = args .multi_scale ,
220+ expanded_scales = args .expanded_scales ,
221+ skip_random_resize = not args .do_random_resize_via_padding ,
222+ patch_size = args .patch_size ,
223+ num_windows = args .num_windows
224+ ))
224225 else :
225- dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms (image_set , resolution , multi_scale = args .multi_scale , expanded_scales = args .expanded_scales ))
226+ dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms (
227+ image_set ,
228+ resolution ,
229+ multi_scale = args .multi_scale ,
230+ expanded_scales = args .expanded_scales ,
231+ skip_random_resize = not args .do_random_resize_via_padding ,
232+ patch_size = args .patch_size ,
233+ num_windows = args .num_windows
234+ ))
226235 return dataset
227236
228237def build_roboflow (image_set , args , resolution ):
@@ -249,7 +258,23 @@ def build_roboflow(image_set, args, resolution):
249258
250259
251260 if square_resize_div_64 :
252- dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms_square_div_64 (image_set , resolution , multi_scale = args .multi_scale ))
261+ dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms_square_div_64 (
262+ image_set ,
263+ resolution ,
264+ multi_scale = args .multi_scale ,
265+ expanded_scales = args .expanded_scales ,
266+ skip_random_resize = not args .do_random_resize_via_padding ,
267+ patch_size = args .patch_size ,
268+ num_windows = args .num_windows
269+ ))
253270 else :
254- dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms (image_set , resolution , multi_scale = args .multi_scale ))
271+ dataset = CocoDetection (img_folder , ann_file , transforms = make_coco_transforms (
272+ image_set ,
273+ resolution ,
274+ multi_scale = args .multi_scale ,
275+ expanded_scales = args .expanded_scales ,
276+ skip_random_resize = not args .do_random_resize_via_padding ,
277+ patch_size = args .patch_size ,
278+ num_windows = args .num_windows
279+ ))
255280 return dataset
0 commit comments