-
Notifications
You must be signed in to change notification settings - Fork 62
Expand file tree
/
Copy pathcrnn_resnet34.yaml
More file actions
187 lines (173 loc) · 5.78 KB
/
crnn_resnet34.yaml
File metadata and controls
187 lines (173 loc) · 5.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: True
amp_level: 'O3'
seed: 42
log_interval: 100
val_while_train: True
drop_overflow_update: False
common:
character_dict_path: &character_dict_path #mindocr/utils/dict/en_dict.txt
num_classes: &num_classes 37 # num_chars_in_dict+1, TODO: retreive it from dict or check correctness
max_text_len: &max_text_len 24
infer_mode: &infer_mode False
use_space_char: &use_space_char False
batch_size: &batch_size 64
model:
type: rec
transform: null
backbone:
name: rec_resnet34
pretrained: False
neck:
name: RNNEncoder
hidden_size: 256
head:
name: CTCHead
weight_init: crnn_customised
bias_init: crnn_customised
out_channels: *num_classes
postprocess:
name: RecCTCLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char
metric:
name: RecMetric
main_indicator: acc
character_dict_path: *character_dict_path
ignore_space: True
print_flag: False
loss:
name: CTCLoss
pred_seq_len: 25 # TODO: retrieve from the network output shape.
max_label_len: *max_text_len # this value should be smaller than pre_seq_len
batch_size: *batch_size
scheduler:
scheduler: warmup_cosine_decay
min_lr: 0.0
lr: 0.0005
num_epochs: 30
warmup_epochs: 1
decay_epochs: 29
optimizer:
opt: adamw
filter_bias_and_bn: True
momentum: 0.95
weight_decay: 0.0001
nesterov: False
loss_scaler:
type: static
loss_scale: 512
train:
ckpt_save_dir: './tmp_rec'
pred_cast_fp32: False # let CTCLoss cast internally
dataset_sink_mode: False
dataset:
type: LMDBDataset
dataset_root: path/to/data_lmdb_release/ # Optional, if set, dataset_root will be used as a prefix for data_dir
data_dir: training/
# label_file: # not required when using LMDBDataset
sample_ratio: 1.0
shuffle: True
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- RecCTCLabelEncode:
max_text_len: *max_text_len
character_dict_path: *character_dict_path
use_space_char: *use_space_char
lower: True
- RecResizeNormImg:
image_shape: [32, 100] # H, W
infer_mode: *infer_mode
character_dict_path: *character_dict_path
padding: False # aspect ratio will be preserved if true.
norm_before_pad: False
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
output_columns: ['image', 'text_seq'] #, 'length'] #'img_path']
net_input_column_index: [0] # input indices for network forward func in output_columns
label_column_index: [1] # input indices marked as label
#keys_for_loss: 4 # num labels for loss func
loader:
shuffle: True # TODO: tbc
batch_size: *batch_size
drop_remainder: True
max_rowsize: 12
num_workers: 8
eval:
ckpt_load_path: ./tmp_rec/best.ckpt
dataset_sink_mode: False
dataset:
type: LMDBDataset
dataset_root: path/to/data_lmdb_release/
data_dir: validation/
# label_file: # not required when using LMDBDataset
sample_ratio: 1.0
shuffle: False
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- RecCTCLabelEncode:
max_text_len: *max_text_len
character_dict_path: *character_dict_path
use_space_char: *use_space_char
lower: True
- RecResizeNormForInfer:
target_height: 32
target_width: 100
keep_ratio: False
padding: False
norm_before_pad: False
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
output_columns: ['image', 'text_padded', 'text_length'] # TODO return text string padding w/ fixed length, and a scaler to indicate the length
net_input_column_index: [0] # input indices for network forward func in output_columns
label_column_index: [1, 2] # input indices marked as label
loader:
shuffle: False # TODO: tbc
batch_size: 64
drop_remainder: False
max_rowsize: 12
num_workers: 8
predict:
ckpt_load_path: ./tmp_rec/best.ckpt
vis_font_path: tools/utils/simfang.ttf
dataset_sink_mode: False
dataset:
type: PredictDataset
dataset_root: path/to/dataset_root
data_dir: predict_result/crop
# label_files: # not required when using LMDBDataset
sample_ratio: 1.0
shuffle: False
transform_pipeline:
- DecodeImage:
img_mode: BGR
to_float32: False
# - RecCTCLabelEncode:
# max_text_len: *max_text_len
# character_dict_path: *character_dict_path
# use_space_char: *use_space_char
# lower: True
- RecResizeImg: # different from paddle (paddle converts image from HWC to CHW and rescale to [-1, 1] after resize.
image_shape: [32, 100] # H, W
infer_mode: *infer_mode
character_dict_path: *character_dict_path
padding: False # aspect ratio will be preserved if true.
- NormalizeImage: # different from paddle (paddle wrongly normalize BGR image with RGB mean/std from ImageNet for det, and simple rescale to [-1, 1] in rec.
bgr_to_rgb: True
is_hwc: True
mean : [127.0, 127.0, 127.0]
std : [127.0, 127.0, 127.0]
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
output_columns: [ 'img_path', 'image', 'raw_img_shape' ]
loader:
shuffle: False # TODO: tbc
batch_size: 1
drop_remainder: True
max_rowsize: 12
num_workers: 8