-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathstep_02.py
More file actions
83 lines (65 loc) · 2.97 KB
/
step_02.py
File metadata and controls
83 lines (65 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# ===----------------------------------------------------------------------=== #
#
# This file is Modular Inc proprietary.
#
# ===----------------------------------------------------------------------=== #
"""
Step 02: Feed-forward Network (MLP)
Implement the MLP used in each transformer block with GELU activation.
Tasks:
1. Import functional (as F), Tensor, Linear, and Module from MAX
2. Create c_fc linear layer (embedding to intermediate dimension)
3. Create c_proj linear layer (intermediate back to embedding dimension)
4. Apply c_fc transformation in forward pass
5. Apply GELU activation function
6. Apply c_proj transformation and return result
Run: pixi run s02
"""
# 1: Import the required modules from MAX
# TODO: Import functional module max.functional with the alias F
# https://docs.modular.com/max/api/python/functional/
# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor/
# TODO: Import Linear and Module from max.nn
# https://docs.modular.com/max/api/python/nn/
from max.tensor import Tensor
from step_01 import GPT2Config
class GPT2MLP(Module):
"""Feed-forward network matching HuggingFace GPT-2 structure.
Args:
intermediate_size: Size of the intermediate layer.
config: GPT-2 configuration.
"""
def __init__(self, intermediate_size: int, config: GPT2Config) -> None:
super().__init__()
embed_dim = config.n_embd
# 2: Create the first linear layer (embedding to intermediate)
# TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the expansion layer in the MLP
self.c_fc = None
# 3: Create the second linear layer (intermediate back to embedding)
# TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the projection layer that brings us back to the embedding dimension
self.c_proj = None
def forward(self, hidden_states: Tensor) -> Tensor:
"""Apply feed-forward network.
Args:
hidden_states: Input hidden states.
Returns:
MLP output.
"""
# 4: Apply the first linear transformation
# TODO: Apply self.c_fc to hidden_states
# Hint: This expands the hidden dimension to the intermediate size
hidden_states = None
# 5: Apply GELU activation function
# TODO: Use F.gelu() with hidden_states and approximate="tanh"
# https://docs.modular.com/max/api/python/functional/#max.functional.gelu
# Hint: GELU is the non-linear activation used in GPT-2's MLP
hidden_states = None
# 6: Apply the second linear transformation and return
# TODO: Apply self.c_proj to hidden_states and return the result
# Hint: This projects back to the embedding dimension
return None