-
Notifications
You must be signed in to change notification settings - Fork 372
/
Copy pathconfig_model_full.py
145 lines (141 loc) · 3.89 KB
/
config_model_full.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# The full possible hyperparameters for the attentional seq2seq model.
# Most of the hyperparameters take the default values and are not necessary to
# specify explicitly. The config here results in the same model with the
# `config_model.py`.
num_units = 256
beam_width = 10
# --------------------- Embedder --------------------- #
embedder = {
'dim': num_units,
'initializer': {
'type': 'random_uniform_initializer',
'kwargs': {
'minval': -0.1,
'maxval': 0.1,
'seed': None
},
},
'regularizer': {
'type': 'L1L2',
'kwargs': {
'l1': 0,
'l2': 0
}
},
'dropout_rate': 0,
'dropout_strategy': 'element',
'trainable': True,
'name': 'word_embedder'
}
# --------------------- Encoder --------------------- #
encoder = {
'rnn_cell_fw': {
'type': 'LSTMCell',
'kwargs': {
'num_units': num_units,
'forget_bias': 1.0,
'activation': None,
# Other arguments go here for tf.nn.rnn_cell.LSTMCell
# ...
},
'num_layers': 1,
'dropout': {
'input_keep_prob': 1.0,
'output_keep_prob': 1.0,
'state_keep_prob': 1.0,
'variational_recurrent': False,
'input_size': [],
},
'residual': False,
'highway': False,
},
'rnn_cell_bw': {
# The same possible hyperparameters as with 'rnn_cell_fw'
# ...
},
'rnn_cell_share_config': True,
'output_layer_fw': {
'num_layers': 0,
'layer_size': 128,
'activation': 'identity',
'final_layer_activation': None,
'other_dense_kwargs': None,
'dropout_layer_ids': [],
'dropout_rate': 0.5,
'variational_dropout': False
},
'output_layer_bw': {
# The same possible hyperparameters as with 'output_layer_fw'
# ...
},
'output_layer_share_config': True,
'name': 'bidirectional_rnn_encoder'
}
# --------------------- Decoder --------------------- #
decoder = {
'rnn_cell': {
'type': 'LSTMCell',
'kwargs': {
'num_units': num_units,
'forget_bias': 1.0,
'activation': None,
# Other arguments go here for tf.nn.rnn_cell.LSTMCell
# ...
},
'num_layers': 1,
'dropout': {
'input_keep_prob': 1.0,
'output_keep_prob': 1.0,
'state_keep_prob': 1.0,
'variational_recurrent': False,
'input_size': [],
},
'residual': False,
'highway': False,
},
'attention': {
'type': 'LuongAttention',
'kwargs': {
'num_units': num_units,
'scale': False,
'probability_fn': None,
'score_mask_value': None,
# Other arguments go here for tf.contrib.seq2seq.LuongAttention
# ...
},
'attention_layer_size': num_units,
'alignment_history': False,
'output_attention': True,
},
'helper_train': {
'type': 'TrainingHelper',
'kwargs': {
# Arguments go here for tf.contrib.seq2seq.TrainingHelper
}
},
'helper_infer': {
# The same possible hyperparameters as with 'helper_train'
# ...
},
'max_decoding_length_train': None,
'max_decoding_length_infer': None,
'name': 'attention_rnn_decoder'
}
# --------------------- Optimization --------------------- #
opt = {
'optimizer': {
'type': 'AdamOptimizer',
'kwargs': {
'learning_rate': 0.001,
# Other keyword arguments for the optimizer class
},
},
'learning_rate_decay': {
# Hyperparameters of learning rate decay
},
'gradient_clip': {
# Hyperparameters of gradient clipping
},
'gradient_noise_scale': None,
'name': None
}