Skip to content

Commit 8ab66db

Browse files
authored
Release1.0.1 (#101)
* add swin transformer * add swin transformer * add swin transformer * add beit * add device * add beit * add beit * add dvae dalle_vae * add beit * add beit * add beit * add beit * add beit * add beit * add beit * add beit * add layer decay * add layer decay * add beit * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device * add device
1 parent 19dfdc7 commit 8ab66db

File tree

86 files changed

+3417
-390
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+3417
-390
lines changed
+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
epochs: 100
2+
output_dir: output_dir
3+
seed: 0
4+
device: gpu
5+
6+
model:
7+
name: BEiTFTWrapper
8+
architecture:
9+
name: VisionTransformerForFinetune
10+
img_size: 224
11+
embed_dim: 768
12+
patch_size: 16
13+
depth: 12
14+
num_heads: 12
15+
mlp_ratio: 4
16+
qkv_bias: True
17+
drop_path_rate: 0.1
18+
init_values: 0.1
19+
use_abs_pos_emb: False
20+
use_rel_pos_bias: True
21+
head:
22+
name: BEiTFTHead
23+
num_classes: 1000
24+
in_channels: 768
25+
26+
dataloader:
27+
train:
28+
loader:
29+
num_workers: 8
30+
use_shared_memory: True
31+
sampler:
32+
batch_size: 128
33+
shuffle: True
34+
drop_last: True
35+
dataset:
36+
name: ImageNet
37+
dataroot: data/ILSVRC2012/train/
38+
return_label: True
39+
transforms:
40+
- name: RandomResizedCrop
41+
size: 224
42+
scale: [0.08, 1.]
43+
interpolation: 'bicubic'
44+
- name: RandomHorizontalFlip
45+
- name: AutoAugment
46+
config_str: 'rand-m9-mstd0.5-inc1'
47+
interpolation: 'bicubic'
48+
img_size: 224
49+
mean: [0.5, 0.5, 0.5]
50+
std: [0.5, 0.5, 0.5]
51+
- name: Transpose
52+
- name: NormalizeImage
53+
scale: 1.0/255.0
54+
mean: [0.5, 0.5, 0.5]
55+
std: [0.5, 0.5, 0.5]
56+
- name: RandomErasing
57+
prob: 0.25
58+
mode: 'pixel'
59+
max_count: 1
60+
batch_transforms:
61+
- name: Mixup
62+
mixup_alpha: 0.8
63+
prob: 1.
64+
switch_prob: 0.5
65+
mode: 'batch'
66+
cutmix_alpha: 1.0
67+
val:
68+
loader:
69+
num_workers: 8
70+
use_shared_memory: True
71+
sampler:
72+
batch_size: 64
73+
shuffle: false
74+
drop_last: false
75+
dataset:
76+
name: ImageNet
77+
dataroot: data/ILSVRC2012/val
78+
return_label: True
79+
transforms:
80+
- name: Resize
81+
size: 256
82+
interpolation: 'bicubic'
83+
- name: CenterCrop
84+
size: 224
85+
- name: Transpose
86+
- name: Normalize
87+
mean: [123.675, 116.28, 103.53]
88+
std: [58.395, 57.12, 57.375]
89+
90+
lr_scheduler:
91+
name: LinearWarmup
92+
learning_rate:
93+
name: CosineAnnealingDecay
94+
learning_rate: 4e-3
95+
T_max: 100
96+
eta_min: 1e-6
97+
warmup_steps: 20
98+
start_lr: 0
99+
end_lr: 4e-3
100+
101+
optimizer:
102+
name: AdamW
103+
beta1: 0.9
104+
beta2: 0.999
105+
weight_decay: 0.05
106+
epsilon: 1e-8
107+
exclude_from_weight_decay: ["pos_embed","cls_token",".bias","norm","gamma"]
108+
layer_decay: 0.65
109+
110+
log_config:
111+
name: LogHook
112+
interval: 10
113+
114+
checkpoint:
115+
name: CheckpointHook
116+
by_epoch: true
117+
interval: 1
118+
119+
custom_config:
120+
- name: EvaluateHook
121+
122+
vdl_config:
123+
name: VisualHook
+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
epochs: 800
2+
output_dir: output_dir
3+
seed: 0
4+
device: gpu
5+
6+
model:
7+
name: BEiTPTWrapper
8+
architecture:
9+
name: VisionTransformerForMaskedImageModeling
10+
img_size: 224
11+
embed_dim: 768
12+
patch_size: 16
13+
depth: 12
14+
num_heads: 12
15+
mlp_ratio: 4
16+
use_abs_pos_emb: False
17+
use_rel_pos_bias: False
18+
use_shared_rel_pos_bias: True
19+
init_values: 0.1
20+
drop_path_rate: 0.1
21+
head:
22+
name: BEiTPTHead
23+
num_classes: 1000
24+
in_channels: 768
25+
d_vae:
26+
name: dall-e
27+
weight_path: 'dvae/'
28+
image_size: 112
29+
30+
dataloader:
31+
train:
32+
loader:
33+
num_workers: 0
34+
use_shared_memory: False
35+
sampler:
36+
batch_size: 128
37+
shuffle: True
38+
drop_last: True
39+
dataset:
40+
name: BEiT_ImageNet
41+
dataroot: data/ILSVRC2012/train/
42+
common_transforms:
43+
- name: ToRGB
44+
- name: ColorJitter
45+
brightness: 0.4
46+
contrast: 0.4
47+
saturation: 0.4
48+
hue: 0.4
49+
- name: RandomHorizontalFlip
50+
- name: RandomResizedCropAndInterpolationWithTwoPic
51+
size: 224
52+
second_size: 112
53+
interpolation: 'bicubic'
54+
second_interpolation: 'lanczos'
55+
patch_transforms:
56+
- name: Transpose
57+
- name: NormalizeImage
58+
scale: 1.0/255.0
59+
mean: [0.485, 0.456, 0.406]
60+
std: [0.229, 0.224, 0.225]
61+
visual_token_transforms:
62+
- name: Transpose
63+
- name: VisualTokenMap
64+
mode: 'map_pixels'
65+
scale: 255
66+
masking_generator:
67+
input_size: 14
68+
num_masking_patches: 75
69+
max_num_patches: None
70+
min_num_patches: 16
71+
72+
lr_scheduler:
73+
name: LinearWarmup
74+
learning_rate:
75+
name: CosineAnnealingDecay
76+
learning_rate: 3e-3
77+
T_max: 800
78+
eta_min: 1e-5
79+
warmup_steps: 10
80+
start_lr: 0
81+
end_lr: 3e-3
82+
83+
optimizer:
84+
name: AdamW
85+
beta1: 0.9
86+
beta2: 0.999
87+
weight_decay: 0.05
88+
epsilon: 1e-8
89+
exclude_from_weight_decay: ["pos_embed","cls_token",".bias","norm","gamma"]
90+
grad_clip:
91+
name: global_norm
92+
value: 3.0
93+
94+
log_config:
95+
name: LogHook
96+
interval: 1
97+
98+
checkpoint:
99+
name: CheckpointHook
100+
by_epoch: True
101+
interval: 1
102+
103+
vdl_config:
104+
name: VisualHook

configs/byol/byol_clas_r50.yaml

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 100
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: ByolClassification
@@ -16,7 +18,9 @@ model:
1618

1719
dataloader:
1820
train:
19-
num_workers: 8
21+
loader:
22+
num_workers: 8
23+
use_shared_memory: True
2024
sampler:
2125
batch_size: 128
2226
shuffle: true
@@ -30,11 +34,13 @@ dataloader:
3034
- name: RandomHorizontalFlip
3135
- name: Resize
3236
size: [224,224]
33-
interpolation: bicubic
37+
interpolation: bicubic
3438
- name: ByolNormalize
3539
- name: Clip
3640
val:
37-
num_workers: 8
41+
loader:
42+
num_workers: 8
43+
use_shared_memory: True
3844
sampler:
3945
batch_size: 128
4046
shuffle: false
@@ -49,7 +55,7 @@ dataloader:
4955
- name: CenterCrop
5056
size: 224
5157
- name: ByolNormalize
52-
- name: Clip
58+
- name: Clip
5359

5460
lr_scheduler:
5561
name: ByolLRScheduler

configs/byol/byol_r50_IM.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
epochs: 300
22
use_byol_iters: True
33
total_images: 1281167
4-
global_batch_size: 4096 # 128 * 4 * 8
4+
global_batch_size: 4096
55
output_dir: output_dir
6+
seed: 0
7+
device: gpu
8+
69
model:
710
name: BYOL
811
backbone:
@@ -33,7 +36,9 @@ model:
3336

3437
dataloader:
3538
train:
36-
num_workers: 8
39+
loader:
40+
num_workers: 8
41+
use_shared_memory: True
3742
sampler:
3843
batch_size: 128
3944
shuffle: True

configs/cait/cait_m36_384.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 300
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: CaitWrapper
@@ -18,7 +20,9 @@ model:
1820

1921
dataloader:
2022
train:
21-
num_workers: 8
23+
loader:
24+
num_workers: 8
25+
use_shared_memory: True
2226
sampler:
2327
batch_size: 128
2428
shuffle: true

configs/cait/cait_m48_448.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 300
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: CaitWrapper
@@ -18,7 +20,9 @@ model:
1820

1921
dataloader:
2022
train:
21-
num_workers: 8
23+
loader:
24+
num_workers: 8
25+
use_shared_memory: True
2226
sampler:
2327
batch_size: 128
2428
shuffle: true

configs/cait/cait_s24_224.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 300
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: CaitWrapper
@@ -18,7 +20,9 @@ model:
1820

1921
dataloader:
2022
train:
21-
num_workers: 8
23+
loader:
24+
num_workers: 8
25+
use_shared_memory: True
2226
sampler:
2327
batch_size: 128
2428
shuffle: true

configs/cait/cait_s24_384.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 300
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: CaitWrapper
@@ -18,7 +20,9 @@ model:
1820

1921
dataloader:
2022
train:
21-
num_workers: 8
23+
loader:
24+
num_workers: 8
25+
use_shared_memory: True
2226
sampler:
2327
batch_size: 128
2428
shuffle: true

configs/cait/cait_s36_384.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
epochs: 300
22
output_dir: output_dir
3+
seed: 0
4+
device: gpu
35

46
model:
57
name: CaitWrapper
@@ -18,7 +20,9 @@ model:
1820

1921
dataloader:
2022
train:
21-
num_workers: 8
23+
loader:
24+
num_workers: 8
25+
use_shared_memory: True
2226
sampler:
2327
batch_size: 128
2428
shuffle: true

0 commit comments

Comments
 (0)