Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions steps/step_02.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
"""

# 1: Import the required modules from MAX
# TODO: Import functional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# TODO: Import functional module max.functional with the alias F
# https://docs.modular.com/max/api/python/functional/

# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# https://docs.modular.com/max/api/python/tensor/

# TODO: Import Linear and Module from max.nn
# https://docs.modular.com/max/api/python/nn/module_v3
# https://docs.modular.com/max/api/python/nn/

from max.tensor import Tensor
from step_01 import GPT2Config
Expand All @@ -47,13 +47,13 @@ def __init__(self, intermediate_size: int, config: GPT2Config) -> None:

# 2: Create the first linear layer (embedding to intermediate)
# TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True
# https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the expansion layer in the MLP
self.c_fc = None

# 3: Create the second linear layer (intermediate back to embedding)
# TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True
# https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
# https://docs.modular.com/max/api/python/nn/Linear
# Hint: This is the projection layer that brings us back to the embedding dimension
self.c_proj = None

Expand All @@ -73,7 +73,7 @@ def forward(self, hidden_states: Tensor) -> Tensor:

# 5: Apply GELU activation function
# TODO: Use F.gelu() with hidden_states and approximate="tanh"
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.gelu
# https://docs.modular.com/max/api/python/functional/#max.functional.gelu
# Hint: GELU is the non-linear activation used in GPT-2's MLP
hidden_states = None

Expand Down
14 changes: 7 additions & 7 deletions steps/step_03.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from max.driver import Device
from max.dtype import DType

# TODO: Import necessary funcional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# TODO: Import Tensor object from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# TODO: Import functional module max.functional with the alias F
# https://docs.modular.com/max/api/python/functional/
# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor/
from max.graph import Dim, DimLike
from max.tensor import Tensor

Expand Down Expand Up @@ -56,18 +56,18 @@ def causal_mask(

# 3: Create a constant tensor filled with negative infinity
# TODO: Use Tensor.constant() with float("-inf"), dtype, and device parameters
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.constant
# https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.constant
# Hint: This creates the base mask value that will block attention to future tokens
mask = None

# 4: Broadcast the mask to the correct shape
# TODO: Use F.broadcast_to() to expand mask to shape (sequence_length, n)
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.broadcast_to
# https://docs.modular.com/max/api/python/functional/#max.functional.broadcast_to
# Hint: This creates a 2D attention mask matrix
mask = None

# 5: Apply band_part to create the causal (lower triangular) structure and return the mask
# TODO: Use F.band_part() with num_lower=None, num_upper=0, exclude=True
# https://docs.modular.com/max/api/python/nn/functional/#max.nn.functional.band_part
# https://docs.modular.com/max/api/python/functional/#max.functional.band_part
# Hint: This keeps only the lower triangle, allowing attention to past tokens only
return None
12 changes: 6 additions & 6 deletions steps/step_05.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
"""

# 1: Import the required modules from MAX
# TODO: Import functional module from max.nn with the alias F
# https://docs.modular.com/max/api/python/nn/functional
# TODO: Import functional module max.functional with the alias F
# https://docs.modular.com/max/api/python/functional/

# TODO: Import Tensor from max.tensor
# https://docs.modular.com/max/api/python/tensor.Tensor
# https://docs.modular.com/max/api/python/tensor/

from max.graph import DimLike
from max.nn import Module
Expand All @@ -42,12 +42,12 @@ def __init__(self, dim: DimLike, *, eps: float = 1e-5) -> None:

# 2: Initialize learnable weight and bias parameters
# TODO: Create self.weight as a Tensor of ones with shape [dim]
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.ones
# https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.ones
# Hint: This is the gamma parameter in layer normalization
self.weight = None

# TODO: Create self.bias as a Tensor of zeros with shape [dim]
# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.zeros
# https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.zeros
# Hint: This is the beta parameter in layer normalization
self.bias = None

Expand All @@ -62,6 +62,6 @@ def forward(self, x: Tensor) -> Tensor:
"""
# 3: Apply layer normalization and return the result
# TODO: Use F.layer_norm() with x, gamma=self.weight, beta=self.bias, epsilon=self.eps
# https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.layer_norm
# https://docs.modular.com/max/api/python/functional/#max.functional.layer_norm
# Hint: Layer normalization normalizes across the last dimension
return None