diff --git a/steps/step_02.py b/steps/step_02.py index c3dfd46..a26d000 100644 --- a/steps/step_02.py +++ b/steps/step_02.py @@ -20,14 +20,14 @@ """ # 1: Import the required modules from MAX -# TODO: Import functional module from max.nn with the alias F -# https://docs.modular.com/max/api/python/nn/functional +# TODO: Import functional module max.functional with the alias F +# https://docs.modular.com/max/api/python/functional/ # TODO: Import Tensor from max.tensor -# https://docs.modular.com/max/api/python/tensor.Tensor +# https://docs.modular.com/max/api/python/tensor/ # TODO: Import Linear and Module from max.nn -# https://docs.modular.com/max/api/python/nn/module_v3 +# https://docs.modular.com/max/api/python/nn/ from max.tensor import Tensor from step_01 import GPT2Config @@ -47,13 +47,13 @@ def __init__(self, intermediate_size: int, config: GPT2Config) -> None: # 2: Create the first linear layer (embedding to intermediate) # TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True - # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear + # https://docs.modular.com/max/api/python/nn/Linear # Hint: This is the expansion layer in the MLP self.c_fc = None # 3: Create the second linear layer (intermediate back to embedding) # TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True - # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear + # https://docs.modular.com/max/api/python/nn/Linear # Hint: This is the projection layer that brings us back to the embedding dimension self.c_proj = None @@ -73,7 +73,7 @@ def forward(self, hidden_states: Tensor) -> Tensor: # 5: Apply GELU activation function # TODO: Use F.gelu() with hidden_states and approximate="tanh" - # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.gelu + # https://docs.modular.com/max/api/python/functional/#max.functional.gelu # Hint: GELU is the non-linear activation used in GPT-2's MLP hidden_states = None diff --git a/steps/step_03.py b/steps/step_03.py index 033e343..321ccf1 100644 --- a/steps/step_03.py +++ b/steps/step_03.py @@ -22,10 +22,10 @@ from max.driver import Device from max.dtype import DType -# TODO: Import necessary funcional module from max.nn with the alias F -# https://docs.modular.com/max/api/python/nn/functional -# TODO: Import Tensor object from max.tensor -# https://docs.modular.com/max/api/python/tensor.Tensor +# TODO: Import functional module max.functional with the alias F +# https://docs.modular.com/max/api/python/functional/ +# TODO: Import Tensor from max.tensor +# https://docs.modular.com/max/api/python/tensor/ from max.graph import Dim, DimLike from max.tensor import Tensor @@ -56,18 +56,18 @@ def causal_mask( # 3: Create a constant tensor filled with negative infinity # TODO: Use Tensor.constant() with float("-inf"), dtype, and device parameters - # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.constant + # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.constant # Hint: This creates the base mask value that will block attention to future tokens mask = None # 4: Broadcast the mask to the correct shape # TODO: Use F.broadcast_to() to expand mask to shape (sequence_length, n) - # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.broadcast_to + # https://docs.modular.com/max/api/python/functional/#max.functional.broadcast_to # Hint: This creates a 2D attention mask matrix mask = None # 5: Apply band_part to create the causal (lower triangular) structure and return the mask # TODO: Use F.band_part() with num_lower=None, num_upper=0, exclude=True - # https://docs.modular.com/max/api/python/nn/functional/#max.nn.functional.band_part + # https://docs.modular.com/max/api/python/functional/#max.functional.band_part # Hint: This keeps only the lower triangle, allowing attention to past tokens only return None diff --git a/steps/step_05.py b/steps/step_05.py index 4ae9ca7..7b86db1 100644 --- a/steps/step_05.py +++ b/steps/step_05.py @@ -17,11 +17,11 @@ """ # 1: Import the required modules from MAX -# TODO: Import functional module from max.nn with the alias F -# https://docs.modular.com/max/api/python/nn/functional +# TODO: Import functional module max.functional with the alias F +# https://docs.modular.com/max/api/python/functional/ # TODO: Import Tensor from max.tensor -# https://docs.modular.com/max/api/python/tensor.Tensor +# https://docs.modular.com/max/api/python/tensor/ from max.graph import DimLike from max.nn import Module @@ -42,12 +42,12 @@ def __init__(self, dim: DimLike, *, eps: float = 1e-5) -> None: # 2: Initialize learnable weight and bias parameters # TODO: Create self.weight as a Tensor of ones with shape [dim] - # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.ones + # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.ones # Hint: This is the gamma parameter in layer normalization self.weight = None # TODO: Create self.bias as a Tensor of zeros with shape [dim] - # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.zeros + # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.zeros # Hint: This is the beta parameter in layer normalization self.bias = None @@ -62,6 +62,6 @@ def forward(self, x: Tensor) -> Tensor: """ # 3: Apply layer normalization and return the result # TODO: Use F.layer_norm() with x, gamma=self.weight, beta=self.bias, epsilon=self.eps - # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.layer_norm + # https://docs.modular.com/max/api/python/functional/#max.functional.layer_norm # Hint: Layer normalization normalizes across the last dimension return None