1
- use crate :: { CPUActivation , Activation , CPUInit , CPURegularizer , Init , LSTMLayer , Tensors , LayerNorm } ;
2
- use core:: f32;
3
- use ndarray:: {
4
- concatenate, s, Array2 , Array3 , ArrayD , Axis , Dimension , Ix2 , Ix3 , IxDyn ,
1
+ use crate :: {
2
+ Activation , CPUActivation , CPUInit , CPURegularizer , Init , LSTMLayer , LayerNorm , Tensors ,
5
3
} ;
4
+ use core:: f32;
5
+ use ndarray:: { concatenate, s, Array2 , Array3 , ArrayD , Axis , Dimension , Ix2 , Ix3 , IxDyn } ;
6
6
use std:: ops:: AddAssign ;
7
7
/// Indices
8
8
/// 0 - Input Gate
@@ -30,12 +30,18 @@ pub struct LSTMCPULayer {
30
30
pub l_w_hh : Array3 < f32 > ,
31
31
pub l_biases : Array2 < f32 > ,
32
32
33
- pub h : Array3 < f32 > ,
34
- pub c : Array3 < f32 > ,
33
+ pub i_t : Array3 < f32 > ,
34
+ pub f_t : Array3 < f32 > ,
35
+ pub o_t : Array3 < f32 > ,
36
+ pub g_t : Array3 < f32 > ,
37
+
38
+ pub h_prev : Array2 < f32 > ,
39
+ pub c_prev : Array2 < f32 > ,
35
40
36
41
pub regularizer : CPURegularizer ,
37
42
}
38
43
44
+ #[ allow( unused_mut) ]
39
45
impl LSTMCPULayer {
40
46
pub fn new ( config : LSTMLayer , size : IxDyn , _tensors : Option < Tensors > ) -> Self {
41
47
let return_sequences = config. return_sequences . unwrap_or ( false ) ;
@@ -68,14 +74,20 @@ impl LSTMCPULayer {
68
74
l_w_ih : Array3 :: zeros ( weight_size) ,
69
75
l_w_hh : Array3 :: zeros ( ( 4 , config. size , config. size ) ) ,
70
76
l_biases : Array2 :: zeros ( ( 4 , config. size ) ) ,
71
- h : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
72
- c : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
77
+ i_t : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
78
+ f_t : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
79
+ o_t : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
80
+ g_t : Array3 :: zeros ( ( size[ 1 ] , size[ 0 ] , config. size ) ) ,
81
+ h_prev : Array2 :: zeros ( ( size[ 0 ] , config. size ) ) ,
82
+ c_prev : Array2 :: zeros ( ( size[ 0 ] , config. size ) ) ,
73
83
regularizer : CPURegularizer :: from (
74
84
config. c . unwrap_or ( 0.0 ) ,
75
85
config. l1_ratio . unwrap_or ( 1.0 ) ,
76
86
) ,
77
87
78
- activation_h : CPUActivation :: from ( config. recurrent_activation . unwrap_or ( Activation :: Sigmoid ) ) ,
88
+ activation_h : CPUActivation :: from (
89
+ config. recurrent_activation . unwrap_or ( Activation :: Sigmoid ) ,
90
+ ) ,
79
91
activation_o : CPUActivation :: from ( config. activation . unwrap_or ( Activation :: Tanh ) ) ,
80
92
}
81
93
}
@@ -92,10 +104,13 @@ impl LSTMCPULayer {
92
104
pub fn forward_propagate ( & mut self , inputs : ArrayD < f32 > ) -> ArrayD < f32 > {
93
105
self . inputs = inputs. into_dimensionality :: < Ix3 > ( ) . unwrap ( ) ;
94
106
let output_size = self . w_ih . shape ( ) [ 2 ] ;
95
- self . h = Array3 :: zeros ( ( self . inputs . shape ( ) [ 1 ] , self . inputs . shape ( ) [ 0 ] , output_size) ) ;
96
- self . c = Array3 :: zeros ( ( self . inputs . shape ( ) [ 1 ] , self . inputs . shape ( ) [ 0 ] , output_size) ) ;
97
- let mut h_t = self . h . index_axis ( Axis ( 0 ) , 0 ) . clone ( ) . to_owned ( ) ;
98
- let mut c_t = self . c . index_axis ( Axis ( 0 ) , 0 ) . clone ( ) . to_owned ( ) ;
107
+ let mut h_t = Array2 :: zeros ( ( self . inputs . shape ( ) [ 0 ] , output_size) ) ;
108
+ let mut c_t = Array2 :: zeros ( h_t. raw_dim ( ) ) ;
109
+
110
+ self . i_t = Array3 :: zeros ( ( self . inputs . shape ( ) [ 1 ] , self . inputs . shape ( ) [ 0 ] , output_size) ) ;
111
+ self . f_t = Array3 :: zeros ( self . i_t . raw_dim ( ) ) ;
112
+ self . o_t = Array3 :: zeros ( self . i_t . raw_dim ( ) ) ;
113
+ self . g_t = Array3 :: zeros ( self . i_t . raw_dim ( ) ) ;
99
114
100
115
let mut outputs = Array3 :: zeros ( if self . return_sequences {
101
116
( self . inputs . shape ( ) [ 0 ] , self . inputs . shape ( ) [ 1 ] , output_size)
@@ -109,7 +124,8 @@ impl LSTMCPULayer {
109
124
. slice ( s ! [ .., t, ..] )
110
125
. to_owned ( )
111
126
. into_dimensionality :: < Ix2 > ( )
112
- . unwrap ( ) ; // Current input
127
+ . unwrap ( ) ;
128
+
113
129
let i_t = ( & x_t. dot ( & self . w_ih . index_axis ( Axis ( 0 ) , 0 ) )
114
130
+ & h_t. dot ( & self . w_hh . index_axis ( Axis ( 0 ) , 0 ) )
115
131
+ & self . biases . index_axis ( Axis ( 0 ) , 0 ) )
@@ -127,56 +143,40 @@ impl LSTMCPULayer {
127
143
+ & self . biases . index_axis ( Axis ( 0 ) , 3 ) )
128
144
. mapv ( |x| ( self . activation_o . activate ) ( & x) ) ;
129
145
146
+ self . i_t . index_axis_mut ( Axis ( 0 ) , t) . assign ( & i_t) ;
147
+ self . f_t . index_axis_mut ( Axis ( 0 ) , t) . assign ( & f_t) ;
148
+ self . o_t . index_axis_mut ( Axis ( 0 ) , t) . assign ( & o_t) ;
149
+ self . g_t . index_axis_mut ( Axis ( 0 ) , t) . assign ( & g_t) ;
150
+
130
151
c_t = & ( & c_t * & f_t) + & ( & g_t * & i_t) ;
131
152
h_t = & c_t. mapv ( |x| ( self . activation_o . activate ) ( & x) ) * & o_t;
132
153
133
- self . h . index_axis_mut ( Axis ( 0 ) , t) . assign ( & h_t) ;
134
- self . c . index_axis_mut ( Axis ( 0 ) , t) . assign ( & c_t) ;
135
-
136
154
if self . return_sequences {
137
155
outputs. slice_mut ( s ! [ .., t, ..] ) . assign ( & h_t) ;
138
156
}
139
157
}
158
+ self . h_prev = h_t. clone ( ) ;
159
+ self . c_prev = c_t. clone ( ) ;
140
160
141
161
if self . return_sequences {
142
162
outputs. into_dyn ( )
143
- }
144
- else {
163
+ } else {
145
164
h_t. into_dyn ( )
146
165
}
147
166
}
148
-
149
- fn split_gates (
150
- & self ,
151
- z : & Array2 < f32 > ,
152
- hidden_size : usize ,
153
- ) -> ( Array2 < f32 > , Array2 < f32 > , Array2 < f32 > , Array2 < f32 > ) {
154
- let i_t = z
155
- . slice ( ndarray:: s![ .., ..hidden_size] )
156
- . mapv ( |x| ( self . activation_h . activate ) ( & x) ) ;
157
- let f_t = z
158
- . slice ( ndarray:: s![ .., hidden_size..2 * hidden_size] )
159
- . mapv ( |x| ( self . activation_h . activate ) ( & x) ) ;
160
- let o_t = z
161
- . slice ( ndarray:: s![ .., 2 * hidden_size..3 * hidden_size] )
162
- . mapv ( |x| ( self . activation_h . activate ) ( & x) ) ;
163
- let g_t = z
164
- . slice ( ndarray:: s![ .., 3 * hidden_size..] )
165
- . mapv ( |x| ( self . activation_o . activate ) ( & x) ) ;
166
-
167
- ( i_t, f_t, o_t, g_t)
168
- }
169
167
pub fn backward_propagate ( & mut self , d_outputs : ArrayD < f32 > ) -> ArrayD < f32 > {
170
168
match d_outputs. shape ( ) . len ( ) {
171
169
2 => {
172
- let d_inputs = self . backward_propagate_2d ( d_outputs. into_dimensionality :: < Ix2 > ( ) . unwrap ( ) ) ;
170
+ let d_inputs =
171
+ self . backward_propagate_2d ( d_outputs. into_dimensionality :: < Ix2 > ( ) . unwrap ( ) ) ;
173
172
d_inputs. into_dyn ( )
174
173
}
175
174
3 => {
176
- let d_inputs = self . backward_propagate_3d ( d_outputs. into_dimensionality :: < Ix3 > ( ) . unwrap ( ) ) ;
175
+ let d_inputs =
176
+ self . backward_propagate_3d ( d_outputs. into_dimensionality :: < Ix3 > ( ) . unwrap ( ) ) ;
177
177
d_inputs. into_dyn ( )
178
178
}
179
- _ => d_outputs
179
+ _ => d_outputs,
180
180
}
181
181
}
182
182
pub fn backward_propagate_3d ( & mut self , d_outputs : Array3 < f32 > ) -> Array3 < f32 > {
@@ -189,8 +189,8 @@ impl LSTMCPULayer {
189
189
self . d_w_hh = Array3 :: zeros ( ( 4 , hidden_size, hidden_size) ) ;
190
190
self . d_biases = Array2 :: zeros ( ( 4 , hidden_size) ) ;
191
191
192
- let h_prev = self . h . index_axis ( Axis ( 0 ) , sequence_length - 1 ) ;
193
- let c_prev = self . c . index_axis ( Axis ( 0 ) , sequence_length - 1 ) ;
192
+ let h_prev = self . h_prev . clone ( ) ;
193
+ let c_prev = self . c_prev . clone ( ) ;
194
194
195
195
let mut d_inputs = Array3 :: < f32 > :: zeros ( ( batch_size, sequence_length, input_size) ) ;
196
196
@@ -224,27 +224,28 @@ impl LSTMCPULayer {
224
224
. into_dimensionality :: < Ix2 > ( )
225
225
. unwrap ( ) ;
226
226
227
- let d_h = d_outputs. slice ( s ! [ .., t, ..] ) . clone ( ) . to_owned ( ) . into_dimensionality :: < Ix2 > ( ) . unwrap ( ) ;
227
+ let d_h = d_outputs
228
+ . slice ( s ! [ .., t, ..] )
229
+ . clone ( )
230
+ . to_owned ( )
231
+ . into_dimensionality :: < Ix2 > ( )
232
+ . unwrap ( ) ;
228
233
229
234
d_h_prev = d_h_prev + d_h;
230
- // clip_gradients(&mut d_h_prev, 5f32);
231
235
232
- let gates = x_t . dot ( & w_ih )
233
- + h_prev . dot ( & w_hh )
234
- + & self . biases . to_shape ( 4 * hidden_size ) . unwrap ( ) ;
235
- let ( i_t , f_t , o_t , g_t) = self . split_gates ( & gates , hidden_size ) ;
236
+ let i_t = self . i_t . index_axis ( Axis ( 0 ) , t ) ;
237
+ let f_t = self . f_t . index_axis ( Axis ( 0 ) , t ) ;
238
+ let o_t = self . o_t . index_axis ( Axis ( 0 ) , t ) ;
239
+ let g_t = self . g_t . index_axis ( Axis ( 0 ) , t ) ;
236
240
237
241
let d_tanned_c = & d_h_prev * & o_t * c_prev. map ( |x| ( self . activation_o . activate ) ( & x) ) ;
238
242
let mut d_c_t = d_tanned_c + & d_c_prev;
239
- // clip_gradients(&mut d_c_t, 5f32);
240
243
241
244
let d_o_t = & d_h_prev * & c_prev. mapv ( |x| ( self . activation_o . activate ) ( & x) ) ;
242
245
let d_f_t = & d_c_t * & c_prev * & f_t. map ( |x| ( self . activation_h . prime ) ( x) ) ;
243
246
let d_i_t = & d_c_t * & g_t * & i_t. map ( |x| ( self . activation_h . prime ) ( x) ) ;
244
247
let d_g_t = & d_c_t * & i_t * & g_t. map ( |x| ( self . activation_o . prime ) ( x) ) ;
245
248
let d_gates = concatenate ! [ Axis ( 1 ) , d_i_t, d_f_t, d_o_t, d_g_t] ;
246
- // println!("ADD {:?}", concatenate![Axis(0), d_i_t, d_f_t, d_o_t, d_g_t].shape());
247
- // println!("DG {:?} {:?}", d_gates.shape(), &w_ih.shape());
248
249
d_inputs
249
250
. slice_mut ( s ! [ .., t, ..] )
250
251
. assign ( & d_gates. dot ( & w_ih. t ( ) ) ) ;
@@ -303,8 +304,8 @@ impl LSTMCPULayer {
303
304
self . d_w_hh = Array3 :: zeros ( ( 4 , hidden_size, hidden_size) ) ;
304
305
self . d_biases = Array2 :: zeros ( ( 4 , hidden_size) ) ;
305
306
306
- let h_prev = self . h . index_axis ( Axis ( 0 ) , sequence_length - 1 ) ;
307
- let c_prev = self . c . index_axis ( Axis ( 0 ) , sequence_length - 1 ) ;
307
+ let h_prev = self . h_prev . clone ( ) ;
308
+ let c_prev = self . c_prev . clone ( ) ;
308
309
309
310
let mut d_inputs = Array3 :: < f32 > :: zeros ( ( batch_size, sequence_length, input_size) ) ;
310
311
@@ -339,22 +340,21 @@ impl LSTMCPULayer {
339
340
. into_dimensionality :: < Ix2 > ( )
340
341
. unwrap ( ) ;
341
342
342
- let gates = x_t . dot ( & w_ih )
343
- + h_prev . dot ( & w_hh )
344
- + & self . biases . to_shape ( 4 * hidden_size ) . unwrap ( ) ;
345
- let ( i_t , f_t , o_t , g_t) = self . split_gates ( & gates , hidden_size ) ;
343
+ let i_t = self . i_t . index_axis ( Axis ( 0 ) , t ) ;
344
+ let f_t = self . f_t . index_axis ( Axis ( 0 ) , t ) ;
345
+ let o_t = self . o_t . index_axis ( Axis ( 0 ) , t ) ;
346
+ let g_t = self . g_t . index_axis ( Axis ( 0 ) , t ) ;
346
347
347
348
let d_tanned_c = & d_h_prev * & o_t * c_prev. map ( |x| ( self . activation_o . activate ) ( & x) ) ;
348
349
let mut d_c_t = d_tanned_c + & d_c_prev;
349
- // clip_gradients(&mut d_c_t, 5f32);
350
350
351
351
let d_o_t = & d_h_prev * & c_prev. mapv ( |x| ( self . activation_o . activate ) ( & x) ) ;
352
352
let d_f_t = & d_c_t * & c_prev * & f_t. map ( |x| ( self . activation_h . prime ) ( x) ) ;
353
353
let d_i_t = & d_c_t * & g_t * & i_t. map ( |x| ( self . activation_h . prime ) ( x) ) ;
354
354
let d_g_t = & d_c_t * & i_t * & g_t. map ( |x| ( self . activation_o . prime ) ( x) ) ;
355
355
356
356
let d_gates = concatenate ! [ Axis ( 1 ) , d_i_t, d_f_t, d_o_t, d_g_t] ;
357
- // println!("OT: {:?}\n\nFT {:?}", &d_h_prev, &d_c_prev);
357
+
358
358
d_inputs
359
359
. slice_mut ( s ! [ .., t, ..] )
360
360
. assign ( & d_gates. dot ( & w_ih. t ( ) ) ) ;
@@ -405,6 +405,7 @@ impl LSTMCPULayer {
405
405
}
406
406
}
407
407
408
+ #[ allow( dead_code) ]
408
409
fn clip_gradients ( grad : & mut Array2 < f32 > , threshold : f32 ) -> ( ) {
409
410
let norm = grad. mapv ( |x| x. powi ( 2 ) ) . sum ( ) . sqrt ( ) ;
410
411
if norm > threshold {
0 commit comments