Skip to content

Commit 8e31438

Browse files
authored
feat: add embeddings, lstm, gelu (#68)
* add embeddings * change import * update flatten backprop * feat: Add postprocessing (and fix WASM) (#65) (#66) * add postprocessing for sign and step * fix wasm * update flatten * allow custom mapping * update matrix types * add lstm * try to fix lstm * allow other activation * add config for activation * add GELU * build wasm * wasm * last lstm test attempt
1 parent a811fd1 commit 8e31438

36 files changed

+1194
-272
lines changed

crates/core/src/cpu/activation.rs

+14
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ pub struct CPUActivation {
77

88
type ActivationFn = fn(x: &f32) -> f32;
99

10+
const ROOT_2_BY_PI: f32 = 0.7978845608028654;
11+
const GELU_APPROX: f32 = 0.044715;
12+
1013
impl CPUActivation {
1114
pub fn from(activation: Activation) -> Self {
1215
let (activate, prime): (ActivationFn, ActivationFn) = match activation {
@@ -15,6 +18,7 @@ impl CPUActivation {
1518
Activation::Linear => (linear, linear_prime),
1619
Activation::Relu => (relu, relu_prime),
1720
Activation::Relu6 => (relu6, relu6_prime),
21+
Activation::Gelu => (gelu, gelu_prime),
1822
Activation::Selu => (selu, selu_prime),
1923
Activation::Sigmoid => (sigmoid, sigmoid_prime),
2024
Activation::Tanh => (tanh, tanh_prime),
@@ -75,6 +79,16 @@ fn relu_prime(x: &f32) -> f32 {
7579
return if *x > 0.0 { 1.0 } else { 0.0 };
7680
}
7781

82+
fn gelu(x: &f32) -> f32 {
83+
return (0.5 * x) * (1.0 + (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh());
84+
}
85+
86+
fn gelu_prime(x: &f32) -> f32 {
87+
let tanned = (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh();
88+
return (0.5 * (1.0 + tanned))
89+
+ (0.5 * x * (1.0 - tanned.powi(2))) * ROOT_2_BY_PI * (1.0 + 3.0 * GELU_APPROX * x.powi(2));
90+
}
91+
7892
fn relu6(x: &f32) -> f32 {
7993
return x.max(0.0).min(6.0);
8094
}

crates/core/src/cpu/backend.rs

+24-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ use crate::{
1111
Pool2DCPULayer, PostProcessor, SoftmaxCPULayer, Tensor, Tensors, Timer,
1212
};
1313

14+
use super::{EmbeddingCPULayer, LSTMCPULayer};
15+
1416
pub struct Backend {
1517
pub silent: bool,
1618
pub config: BackendConfig,
@@ -71,18 +73,28 @@ impl Backend {
7173
size = layer.output_size().to_vec();
7274
layers.push(CPULayer::Dense(layer));
7375
}
74-
Layer::Flatten(config) => {
75-
let layer = FlattenCPULayer::new(config, IxDyn(&size));
76+
Layer::Embedding(config) => {
77+
let layer = EmbeddingCPULayer::new(config, IxDyn(&size));
78+
size = layer.output_size().to_vec();
79+
layers.push(CPULayer::Embedding(layer));
80+
}
81+
Layer::Flatten => {
82+
let layer = FlattenCPULayer::new(IxDyn(&size));
7683
size = layer.output_size().to_vec();
7784
layers.push(CPULayer::Flatten(layer));
7885
}
86+
Layer::LSTM(config) => {
87+
let layer = LSTMCPULayer::new(config, IxDyn(&size), None);
88+
size = layer.output_size().to_vec();
89+
layers.push(CPULayer::LSTM(layer));
90+
}
7991
Layer::Pool2D(config) => {
8092
let layer = Pool2DCPULayer::new(config, IxDyn(&size));
8193
size = layer.output_size().to_vec();
8294
layers.push(CPULayer::Pool2D(layer));
8395
}
84-
Layer::Softmax => {
85-
let layer = SoftmaxCPULayer::new(IxDyn(&size));
96+
Layer::Softmax(config) => {
97+
let layer = SoftmaxCPULayer::new(config, IxDyn(&size));
8698
layers.push(CPULayer::Softmax(layer));
8799
}
88100
}
@@ -125,7 +137,10 @@ impl Backend {
125137
}
126138
}
127139
None => {
140+
// let mut i = 0;
128141
for layer in &mut self.layers {
142+
// i += 1;
143+
// println!("\n\nLayer +{}: {:?}", i, &inputs);
129144
inputs = layer.forward_propagate(inputs, training);
130145
}
131146
}
@@ -138,9 +153,14 @@ impl Backend {
138153
outputs: ArrayViewD<'b, f32>,
139154
data: ArrayViewD<'b, f32>,
140155
) -> ArrayD<f32> {
156+
// println!("\n\nOutput: {:?}", &outputs);
141157
let mut d_outputs = (self.cost.prime)(outputs, data);
158+
// println!("\n\nD Output: {:?}", &d_outputs);
159+
// let mut i = 0;
142160
for layer in self.layers.iter_mut().rev() {
161+
// i += 1;
143162
d_outputs = layer.backward_propagate(d_outputs);
163+
// println!("\n\nLayer -{}: {:?}", i, &d_outputs);
144164
}
145165
d_outputs
146166
}

crates/core/src/cpu/layer_norm.rs

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
extern crate ndarray;
2+
use ndarray::{Array1, ArrayD, Axis};
3+
4+
pub struct LayerNorm {
5+
pub gamma: Array1<f32>,
6+
pub beta: Array1<f32>,
7+
pub epsilon: f32,
8+
}
9+
10+
impl LayerNorm {
11+
pub fn new(hidden_size: usize, epsilon: f32) -> Self {
12+
LayerNorm {
13+
gamma: Array1::ones(hidden_size),
14+
beta: Array1::zeros(hidden_size),
15+
epsilon,
16+
}
17+
}
18+
19+
pub fn forward(&self, input: ArrayD<f32>) -> ArrayD<f32> {
20+
let shape = input.shape();
21+
let last_axis = shape.len() - 1;
22+
23+
let mean = input.mean_axis(Axis(last_axis)).unwrap();
24+
let variance = input.var_axis(Axis(last_axis), 0.0);
25+
26+
let mut normalized_input = input.clone();
27+
normalized_input
28+
.axis_iter_mut(Axis(last_axis))
29+
.enumerate()
30+
.for_each(|(i, mut row)| {
31+
let mean_i = mean[i];
32+
let var_i = variance[i].sqrt() + self.epsilon;
33+
row -= mean_i;
34+
row /= var_i;
35+
});
36+
37+
normalized_input
38+
.axis_iter_mut(Axis(last_axis))
39+
.for_each(|mut item| {
40+
let new = &item * &self.gamma + &self.beta;
41+
item.assign(&new);
42+
});
43+
normalized_input
44+
}
45+
}

crates/core/src/cpu/layers/activation.rs

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use ndarray::{s, ArrayD, Dimension, IxDyn};
2-
use std::ops::{Div, Mul, Sub};
2+
use std::{f32::EPSILON, ops::{Div, Mul, Sub}};
33

4-
use crate::{ActivationLayer, CPUActivation};
4+
use crate::{ActivationLayer, CPUActivation, SoftmaxLayer};
55

66
pub struct ActivationCPULayer {
77
pub outputs: ArrayD<f32>,
@@ -45,11 +45,13 @@ impl ActivationCPULayer {
4545

4646
pub struct SoftmaxCPULayer {
4747
pub outputs: ArrayD<f32>,
48+
pub temperature: f32,
4849
}
4950

5051
impl SoftmaxCPULayer {
51-
pub fn new(size: IxDyn) -> Self {
52+
pub fn new(config: SoftmaxLayer, size: IxDyn) -> Self {
5253
Self {
54+
temperature: config.temperature.unwrap_or(1f32),
5355
outputs: ArrayD::zeros(size),
5456
}
5557
}
@@ -68,18 +70,19 @@ impl SoftmaxCPULayer {
6870
self.outputs = inputs.clone();
6971
let batches = self.outputs.dim()[0];
7072
for b in 0..batches {
71-
let exp = inputs.slice(s![b, ..]).map(|x| x.exp());
73+
let current_input = inputs.slice(s![b, ..]).map(|x| x / self.temperature);
74+
let max = current_input.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
75+
let exp = current_input.map(|x| (x - max).exp());
7276
self.outputs
7377
.slice_mut(s![b, ..])
74-
.assign(&exp.clone().div(exp.sum()));
78+
.assign(&exp.clone().div(exp.sum() + EPSILON));
7579
}
7680
self.outputs.clone().into_dyn()
7781
}
7882

7983
pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
8084
let batches = self.outputs.dim()[0];
8185
let array_size = self.outputs.dim().size() / batches;
82-
8386
let mut d_inputs = ArrayD::zeros(self.outputs.dim());
8487
for b in 0..batches {
8588
for y in 0..array_size {

crates/core/src/cpu/layers/dropout.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ impl Dropout1DCPULayer {
3030

3131
pub fn forward_propagate(&mut self, inputs: ArrayD<f32>, training: bool) -> ArrayD<f32> {
3232
if training {
33-
self.mask = ArrayD::random(self.mask.dim(), Uniform::new(0.0, 1.0))
33+
self.mask = ArrayD::random(inputs.dim(), Uniform::new(0.0, 1.0))
3434
.map(|x| (if x > &self.probability { 1.0 } else { 0.0 }));
3535
inputs.mul(&self.mask).mul(1.0 / 1.0 - self.probability)
3636
} else {
+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
use std::ops::AddAssign;
2+
use ndarray::{Array2, ArrayD, Axis, Ix2, IxDyn};
3+
4+
use crate::{CPUInit, CPURegularizer, EmbeddingLayer, Init};
5+
6+
pub struct EmbeddingCPULayer {
7+
pub input_size: IxDyn,
8+
pub input_indices: Vec<usize>,
9+
pub output_size: Vec<usize>,
10+
pub vocab_size: usize,
11+
pub embedding_size: usize,
12+
pub embeddings: Array2<f32>,
13+
pub d_embeddings: Array2<f32>,
14+
// regularization
15+
pub l_embeddings: Array2<f32>,
16+
17+
pub regularizer: CPURegularizer,
18+
}
19+
20+
impl EmbeddingCPULayer {
21+
pub fn new(config: EmbeddingLayer, size: IxDyn) -> Self {
22+
let init = CPUInit::from(Init::Uniform);
23+
let output_size = vec![size[0], size[1], config.embedding_size];
24+
let embeddings = init.init(IxDyn(&[config.vocab_size, config.embedding_size]), 0, 0).into_dimensionality::<Ix2>().unwrap();
25+
let d_embeddings = Array2::zeros((config.vocab_size, config.embedding_size));
26+
Self {
27+
input_size: size,
28+
input_indices: vec![],
29+
output_size,
30+
vocab_size: config.vocab_size,
31+
embedding_size: config.embedding_size,
32+
embeddings,
33+
d_embeddings,
34+
l_embeddings: Array2::zeros((config.vocab_size, config.embedding_size)),
35+
regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0))
36+
}
37+
}
38+
39+
pub fn output_size(&self) -> Vec<usize> {
40+
self.output_size.clone()
41+
}
42+
43+
pub fn reset(&mut self, batches: usize) {
44+
self.output_size[0] = batches
45+
}
46+
47+
pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
48+
let input_indices: Vec<usize> = inputs.iter().map(|&x| x as usize).collect();
49+
self.input_indices = input_indices.clone();
50+
let embeddings = self.embeddings.select(Axis(0), input_indices.as_slice());
51+
// let output_size = IxDyn(&self.output_size);
52+
embeddings.into_shape_with_order(IxDyn(&[inputs.shape()[0], inputs.shape()[1], self.embedding_size])).unwrap()
53+
}
54+
55+
pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
56+
let indices = Array2::from_shape_vec(Ix2(d_outputs.shape()[0], self.input_size[1]), self.input_indices.clone());
57+
self.d_embeddings = Array2::zeros((self.d_embeddings.shape()[0], self.d_embeddings.shape()[1]));
58+
d_outputs.axis_iter(Axis(0)).zip(indices).for_each(|(rec, i)| {
59+
rec.axis_iter(Axis(0)).zip(i).for_each(|(grad, idx)| {
60+
self.d_embeddings.index_axis_mut(Axis(0), idx).add_assign(&grad);
61+
});
62+
});
63+
self.l_embeddings = self.regularizer.coeff(&self.embeddings.clone().into_dyn()).into_dimensionality::<Ix2>().unwrap();
64+
let mut input_size = self.input_size.clone();
65+
input_size[0] = d_outputs.shape()[0];
66+
ArrayD::from_shape_vec(input_size, self.input_indices.iter().map(|x| *x as f32).collect()).unwrap()
67+
}
68+
}

crates/core/src/cpu/layers/flatten.rs

+8-16
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,15 @@
11
use ndarray::{ArrayD, Dimension, IxDyn};
22

3-
use crate::FlattenLayer;
4-
53
pub struct FlattenCPULayer {
64
pub input_size: IxDyn,
75
pub output_size: Vec<usize>,
86
}
97

108
impl FlattenCPULayer {
11-
pub fn new(config: FlattenLayer, size: IxDyn) -> Self {
12-
let mut new_size = config.size.clone();
13-
new_size.insert(0, size[0]);
14-
let output_size = IxDyn(&new_size);
15-
if output_size.size() != size.size() {
16-
panic!(
17-
"Shape {:#?} is incompatible with shape {:#?}",
18-
output_size, size
19-
)
20-
}
9+
pub fn new(size: IxDyn) -> Self {
2110
Self {
22-
input_size: size,
23-
output_size: new_size,
11+
input_size: size.clone(),
12+
output_size: vec![size[0], size.size() / size[0]],
2413
}
2514
}
2615

@@ -33,11 +22,14 @@ impl FlattenCPULayer {
3322
}
3423

3524
pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
36-
let output_size = IxDyn(&self.output_size);
25+
let output_size = IxDyn(&[inputs.shape()[0], self.output_size[1]]);
26+
println!("O {:?} {:?}", inputs.shape(), self.output_size);
3727
inputs.into_shape_with_order(output_size).unwrap()
3828
}
3929

4030
pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
41-
d_outputs.into_shape_with_order(self.input_size.clone()).unwrap()
31+
let mut current_size = self.input_size.clone();
32+
current_size[0] = d_outputs.shape()[0];
33+
d_outputs.to_shape(current_size).unwrap().to_owned()
4234
}
4335
}

0 commit comments

Comments
 (0)