-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathlstm.h
More file actions
101 lines (86 loc) · 3.45 KB
/
lstm.h
File metadata and controls
101 lines (86 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// lstm.h
// LSTM layer module
#ifndef TINYTENSOR_NN_LSTM_H_
#define TINYTENSOR_NN_LSTM_H_
#include <tt/device.h>
#include <tt/export.h>
#include <tt/nn/module.h>
#include <tt/scalar.h>
#include <tt/tensor.h>
#include <memory>
#include <optional>
#include <ostream>
#include <string>
namespace tinytensor::nn {
// Options for LSTM
struct TINYTENSOR_EXPORT LSTMOptions {
int num_layers = 1;
bool bias = true;
bool batch_first = false;
bool bidirectional = false;
};
// An LSTM layer
// See https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
class TINYTENSOR_EXPORT LSTM : public Module {
public:
// Input initial hidden and cell state
struct InitState {
Tensor h;
Tensor c;
};
// Output packed data from LSTM
struct Output {
Tensor output; // Output
Tensor h; // The final hidden state for each element in the batch
Tensor c; // The final cell state for each element in the batch
};
/**
* Construct an lstm layer
* @param input_size Number of input features
* @param hidden_size Number of features for hidden state
* @param options The LSTM options
* @param dtype The dtype of the weights
* @param device The device the weights should be initialized on
*/
LSTM(
int input_size,
int hidden_size,
const LSTMOptions &options = {},
ScalarType dtype = kDefaultFloat,
Device device = kCPU
);
/**
* Forward pass for LSTM
* @param input Tensor of shape (L, input_size) for unbatched input, (L, B, input_size) if batch_first=false,
* or (B, L, input_size) when batch_first=true, for L=length and B=batch_size
* @param init_state Optional initial hidden and cell state, initialized to zero if not provided. Shape is expected
* to be (D*num_layers, hidden_size) for unbatched input, or (D*num_layers, B, hidden_size) for batched input, for
* B=batch_size and D=2 if bidirectional, 1 otherwise for both h and c
* @return Output of LSTM, final hidden state and final cell state for each element in the sequence.
* For unbatched input, output has shape (L, D*hidden_size), (L, batch_size, D*hidden_size) if batch_first=false,
* or (batch_size, L, D*hidden_size) when batch_first=true
* For unbatched input, hidden and cell states have shape (D*num_layers, hidden_size),
* and (D*num_layers, batch_size, hidden_size) for batched input
*/
[[nodiscard]] auto forward(const Tensor &input, const InitState &init_state) const -> Output;
[[nodiscard]] auto forward(const Tensor &input) const -> Output;
void pretty_print(std::ostream &os) const override;
[[nodiscard]] auto name() const -> std::string override {
return "LSTM";
}
CheckedVec<std::shared_ptr<Tensor>> weights_ih;
CheckedVec<std::shared_ptr<Tensor>> weights_ih_reverse;
CheckedVec<std::shared_ptr<Tensor>> weights_hh;
CheckedVec<std::shared_ptr<Tensor>> weights_hh_reverse;
CheckedVec<std::shared_ptr<Tensor>> biases_ih;
CheckedVec<std::shared_ptr<Tensor>> biases_ih_reverse;
CheckedVec<std::shared_ptr<Tensor>> biases_hh;
CheckedVec<std::shared_ptr<Tensor>> biases_hh_reverse;
private:
[[nodiscard]] auto forward(const Tensor &input, const std::optional<InitState> &init_state) const -> Output;
int input_size_;
int hidden_size_;
LSTMOptions options_;
};
} // namespace tinytensor::nn
#endif // TINYTENSOR_NN_LSTM_H_