|
| 1 | +use std::ops::AddAssign; |
| 2 | +use ndarray::{Array2, ArrayD, Axis, Ix2, IxDyn}; |
| 3 | + |
| 4 | +use crate::{CPUInit, CPURegularizer, EmbeddingLayer, Init}; |
| 5 | + |
| 6 | +pub struct EmbeddingCPULayer { |
| 7 | + pub input_size: IxDyn, |
| 8 | + pub input_indices: Vec<usize>, |
| 9 | + pub output_size: Vec<usize>, |
| 10 | + pub vocab_size: usize, |
| 11 | + pub embedding_size: usize, |
| 12 | + pub embeddings: Array2<f32>, |
| 13 | + pub d_embeddings: Array2<f32>, |
| 14 | + // regularization |
| 15 | + pub l_embeddings: Array2<f32>, |
| 16 | + |
| 17 | + pub regularizer: CPURegularizer, |
| 18 | +} |
| 19 | + |
| 20 | +impl EmbeddingCPULayer { |
| 21 | + pub fn new(config: EmbeddingLayer, size: IxDyn) -> Self { |
| 22 | + let init = CPUInit::from(Init::Uniform); |
| 23 | + let output_size = vec![size[0], size[1], config.embedding_size]; |
| 24 | + let embeddings = init.init(IxDyn(&[config.vocab_size, config.embedding_size]), 0, 0).into_dimensionality::<Ix2>().unwrap(); |
| 25 | + let d_embeddings = Array2::zeros((config.vocab_size, config.embedding_size)); |
| 26 | + Self { |
| 27 | + input_size: size, |
| 28 | + input_indices: vec![], |
| 29 | + output_size, |
| 30 | + vocab_size: config.vocab_size, |
| 31 | + embedding_size: config.embedding_size, |
| 32 | + embeddings, |
| 33 | + d_embeddings, |
| 34 | + l_embeddings: Array2::zeros((config.vocab_size, config.embedding_size)), |
| 35 | + regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0)) |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + pub fn output_size(&self) -> Vec<usize> { |
| 40 | + self.output_size.clone() |
| 41 | + } |
| 42 | + |
| 43 | + pub fn reset(&mut self, batches: usize) { |
| 44 | + self.output_size[0] = batches |
| 45 | + } |
| 46 | + |
| 47 | + pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> { |
| 48 | + let input_indices: Vec<usize> = inputs.iter().map(|&x| x as usize).collect(); |
| 49 | + self.input_indices = input_indices.clone(); |
| 50 | + let embeddings = self.embeddings.select(Axis(0), input_indices.as_slice()); |
| 51 | +// let output_size = IxDyn(&self.output_size); |
| 52 | + embeddings.into_shape_with_order(IxDyn(&[inputs.shape()[0], inputs.shape()[1], self.embedding_size])).unwrap() |
| 53 | + } |
| 54 | + |
| 55 | + pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> { |
| 56 | + let indices = Array2::from_shape_vec(Ix2(d_outputs.shape()[0], self.input_size[1]), self.input_indices.clone()); |
| 57 | + self.d_embeddings = Array2::zeros((self.d_embeddings.shape()[0], self.d_embeddings.shape()[1])); |
| 58 | + d_outputs.axis_iter(Axis(0)).zip(indices).for_each(|(rec, i)| { |
| 59 | + rec.axis_iter(Axis(0)).zip(i).for_each(|(grad, idx)| { |
| 60 | + self.d_embeddings.index_axis_mut(Axis(0), idx).add_assign(&grad); |
| 61 | + }); |
| 62 | + }); |
| 63 | + self.l_embeddings = self.regularizer.coeff(&self.embeddings.clone().into_dyn()).into_dimensionality::<Ix2>().unwrap(); |
| 64 | + let mut input_size = self.input_size.clone(); |
| 65 | + input_size[0] = d_outputs.shape()[0]; |
| 66 | + ArrayD::from_shape_vec(input_size, self.input_indices.iter().map(|x| *x as f32).collect()).unwrap() |
| 67 | + } |
| 68 | +} |
0 commit comments