diff --git a/steps/step_02.py b/steps/step_02.py
index c3dfd46..a26d000 100644
--- a/steps/step_02.py
+++ b/steps/step_02.py
@@ -20,14 +20,14 @@
 """
 
 # 1: Import the required modules from MAX
-# TODO: Import functional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
+# TODO: Import functional module max.functional with the alias F
+# https://docs.modular.com/max/api/python/functional/
 
 # TODO: Import Tensor from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# https://docs.modular.com/max/api/python/tensor/
 
 # TODO: Import Linear and Module from max.nn
-# https://docs.modular.com/max/api/python/nn/module_v3
+# https://docs.modular.com/max/api/python/nn/
 
 from max.tensor import Tensor
 from step_01 import GPT2Config
@@ -47,13 +47,13 @@ def __init__(self, intermediate_size: int, config: GPT2Config) -> None:
 
         # 2: Create the first linear layer (embedding to intermediate)
         # TODO: Create self.c_fc as a Linear layer from embed_dim to intermediate_size with bias=True
-        # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
+        # https://docs.modular.com/max/api/python/nn/Linear
         # Hint: This is the expansion layer in the MLP
         self.c_fc = None
 
         # 3: Create the second linear layer (intermediate back to embedding)
         # TODO: Create self.c_proj as a Linear layer from intermediate_size to embed_dim with bias=True
-        # https://docs.modular.com/max/api/python/nn/module_v3#max.nn.Linear
+        # https://docs.modular.com/max/api/python/nn/Linear
         # Hint: This is the projection layer that brings us back to the embedding dimension
         self.c_proj = None
 
@@ -73,7 +73,7 @@ def forward(self, hidden_states: Tensor) -> Tensor:
 
         # 5: Apply GELU activation function
         # TODO: Use F.gelu() with hidden_states and approximate="tanh"
-        # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.gelu
+        # https://docs.modular.com/max/api/python/functional/#max.functional.gelu
         # Hint: GELU is the non-linear activation used in GPT-2's MLP
         hidden_states = None
 
diff --git a/steps/step_03.py b/steps/step_03.py
index 033e343..321ccf1 100644
--- a/steps/step_03.py
+++ b/steps/step_03.py
@@ -22,10 +22,10 @@
 from max.driver import Device
 from max.dtype import DType
 
-# TODO: Import necessary funcional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
-# TODO: Import Tensor object from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# TODO: Import functional module max.functional with the alias F
+# https://docs.modular.com/max/api/python/functional/
+# TODO: Import Tensor from max.tensor
+# https://docs.modular.com/max/api/python/tensor/
 from max.graph import Dim, DimLike
 from max.tensor import Tensor
 
@@ -56,18 +56,18 @@ def causal_mask(
 
     # 3: Create a constant tensor filled with negative infinity
     # TODO: Use Tensor.constant() with float("-inf"), dtype, and device parameters
-    # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.constant
+    # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.constant
     # Hint: This creates the base mask value that will block attention to future tokens
     mask = None
 
     # 4: Broadcast the mask to the correct shape
     # TODO: Use F.broadcast_to() to expand mask to shape (sequence_length, n)
-    # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.broadcast_to
+    # https://docs.modular.com/max/api/python/functional/#max.functional.broadcast_to
     # Hint: This creates a 2D attention mask matrix
     mask = None
 
     # 5: Apply band_part to create the causal (lower triangular) structure and return the mask
     # TODO: Use F.band_part() with num_lower=None, num_upper=0, exclude=True
-    # https://docs.modular.com/max/api/python/nn/functional/#max.nn.functional.band_part
+    # https://docs.modular.com/max/api/python/functional/#max.functional.band_part
     # Hint: This keeps only the lower triangle, allowing attention to past tokens only
     return None
diff --git a/steps/step_05.py b/steps/step_05.py
index 4ae9ca7..7b86db1 100644
--- a/steps/step_05.py
+++ b/steps/step_05.py
@@ -17,11 +17,11 @@
 """
 
 # 1: Import the required modules from MAX
-# TODO: Import functional module from max.nn with the alias F
-# https://docs.modular.com/max/api/python/nn/functional
+# TODO: Import functional module max.functional with the alias F
+# https://docs.modular.com/max/api/python/functional/
 
 # TODO: Import Tensor from max.tensor
-# https://docs.modular.com/max/api/python/tensor.Tensor
+# https://docs.modular.com/max/api/python/tensor/
 
 from max.graph import DimLike
 from max.nn import Module
@@ -42,12 +42,12 @@ def __init__(self, dim: DimLike, *, eps: float = 1e-5) -> None:
 
         # 2: Initialize learnable weight and bias parameters
         # TODO: Create self.weight as a Tensor of ones with shape [dim]
-        # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.ones
+        # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.ones
         # Hint: This is the gamma parameter in layer normalization
         self.weight = None
 
         # TODO: Create self.bias as a Tensor of zeros with shape [dim]
-        # https://docs.modular.com/max/api/python/tensor#max.tensor.Tensor.zeros
+        # https://docs.modular.com/max/api/python/tensor/#max.tensor.Tensor.zeros
         # Hint: This is the beta parameter in layer normalization
         self.bias = None
 
@@ -62,6 +62,6 @@ def forward(self, x: Tensor) -> Tensor:
         """
         # 3: Apply layer normalization and return the result
         # TODO: Use F.layer_norm() with x, gamma=self.weight, beta=self.bias, epsilon=self.eps
-        # https://docs.modular.com/max/api/python/nn/functional#max.nn.functional.layer_norm
+        # https://docs.modular.com/max/api/python/functional/#max.functional.layer_norm
         # Hint: Layer normalization normalizes across the last dimension
         return None