Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions steps/step_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@

# 1: Import the required modules from MAX
# TODO: Import functional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# https://docs.modular.com/max/api/python/functional

# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor

# TODO: Import Linear and Module from max.nn
# https://docs.modular.com/max/api/python/nn/module_v3
# https://docs.modular.com/max/api/python/nn/module

from max.tensor import Tensor
from step_01 import GPT2Config
Expand All @@ -47,13 +47,13 @@ def __init__(self, intermediate_size: int, config: GPT2Config) -> None:

# 2: Create the first linear layer (embedding to intermediate)
# TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True
# https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the expansion layer in the MLP
self.c_fc = None

# 3: Create the second linear layer (intermediate back to embedding)
# TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True
# https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the projection layer that brings us back to the embedding dimension
self.c_proj = None

Expand All @@ -73,7 +73,7 @@ def forward(self, hidden_states: Tensor) -> Tensor:

# 5: Apply GELU activation function
# TODO: Use F.gelu() with hidden_states and approximate="tanh"
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.gelu
# https://docs.modular.com/max/api/python/functional#max.functional.gelu
# Hint: GELU is the non-linear activation used in GPT-2's MLP
hidden_states = None

Expand Down
8 changes: 4 additions & 4 deletions steps/step_03.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from max.dtype import DType

# TODO: Import necessary funcional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# https://docs.modular.com/max/api/python/functional
# TODO: Import Tensor object from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor
from max.graph import Dim, DimLike
from max.tensor import Tensor

Expand Down Expand Up @@ -62,12 +62,12 @@ def causal_mask(

# 4: Broadcast the mask to the correct shape
# TODO: Use F.broadcast_to() to expand mask to shape (sequence_length, n)
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.broadcast_to
# https://docs.modular.com/max/api/python/functional#max.functional.broadcast_to
# Hint: This creates a 2D attention mask matrix
mask = None

# 5: Apply band_part to create the causal (lower triangular) structure and return the mask
# TODO: Use F.band_part() with num_lower=None, num_upper=0, exclude=True
# https://docs.modular.com/max/api/python/nn/functional/#max.nn.functional.band_part
# https://docs.modular.com/max/api/python/functional#max.functional.band_part
# Hint: This keeps only the lower triangle, allowing attention to past tokens only
return None
6 changes: 3 additions & 3 deletions steps/step_05.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@

# 1: Import the required modules from MAX
# TODO: Import functional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# https://docs.modular.com/max/api/python/functional

# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor

from max.graph import DimLike
from max.nn import Module
Expand Down Expand Up @@ -62,6 +62,6 @@ def forward(self, x: Tensor) -> Tensor:
"""
# 3: Apply layer normalization and return the result
# TODO: Use F.layer_norm() with x, gamma=self.weight, beta=self.bias, epsilon=self.eps
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.layer_norm
# https://docs.modular.com/max/api/python/functional#max.functional.layer_norm
# Hint: Layer normalization normalizes across the last dimension
return None