modular · Haivilo · Feb 10, 2026
diff --git a/steps/step_02.py b/steps/step_02.py
@@ -21,13 +21,13 @@
 
 # 1: Import the required modules from MAX
 # TODO: Import functional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
+# https://docs.modular.com/max/api/python/functional
 
 # TODO: Import Tensor from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor
 
 # TODO: Import Linear and Module from max.nn
-# https://docs.modular.com/max/api/python/nn/module_v3
+# https://docs.modular.com/max/api/python/nn/module
 
 from max.tensor import Tensor
 from step_01 import GPT2Config
@@ -47,13 +47,13 @@ def __init__(self, intermediate_size: int, config: GPT2Config) -> None:
 
         # 2: Create the first linear layer (embedding to intermediate)
         # TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True
-        # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
+        # https://docs.modular.com/max/api/python/nn/Linear
         # Hint: This is the expansion layer in the MLP
         self.c_fc = None
 
         # 3: Create the second linear layer (intermediate back to embedding)
         # TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True
-        # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
+        # https://docs.modular.com/max/api/python/nn/Linear
         # Hint: This is the projection layer that brings us back to the embedding dimension
         self.c_proj = None
 
@@ -73,7 +73,7 @@ def forward(self, hidden_states: Tensor) -> Tensor:
 
         # 5: Apply GELU activation function
         # TODO: Use F.gelu() with hidden_states and approximate="tanh"
-        # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.gelu
+        # https://docs.modular.com/max/api/python/functional#max.functional.gelu
         # Hint: GELU is the non-linear activation used in GPT-2's MLP
         hidden_states = None
 

diff --git a/steps/step_03.py b/steps/step_03.py
@@ -23,9 +23,9 @@
 from max.dtype import DType
 
 # TODO: Import necessary funcional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
+# https://docs.modular.com/max/api/python/functional
 # TODO: Import Tensor object from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor
 from max.graph import Dim, DimLike
 from max.tensor import Tensor
 
@@ -62,12 +62,12 @@ def causal_mask(
 
     # 4: Broadcast the mask to the correct shape
     # TODO: Use F.broadcast_to() to expand mask to shape (sequence_length, n)
-    # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.broadcast_to
+    # https://docs.modular.com/max/api/python/functional#max.functional.broadcast_to
     # Hint: This creates a 2D attention mask matrix
     mask = None
 
     # 5: Apply band_part to create the causal (lower triangular) structure and return the mask
     # TODO: Use F.band_part() with num_lower=None, num_upper=0, exclude=True
-    # https://docs.modular.com/max/api/python/nn/functional/#max.nn.functional.band_part
+    # https://docs.modular.com/max/api/python/functional#max.functional.band_part
     # Hint: This keeps only the lower triangle, allowing attention to past tokens only
     return None
diff --git a/steps/step_05.py b/steps/step_05.py
@@ -18,10 +18,10 @@
 
 # 1: Import the required modules from MAX
 # TODO: Import functional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
+# https://docs.modular.com/max/api/python/functional
 
 # TODO: Import Tensor from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor
 
 from max.graph import DimLike
 from max.nn import Module
@@ -62,6 +62,6 @@ def forward(self, x: Tensor) -> Tensor:
         """
         # 3: Apply layer normalization and return the result
         # TODO: Use F.layer_norm() with x, gamma=self.weight, beta=self.bias, epsilon=self.eps
-        # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.layer_norm
+        # https://docs.modular.com/max/api/python/functional#max.functional.layer_norm
         # Hint: Layer normalization normalizes across the last dimension
         return None