Move mode.train() to the training loop

bzz · bzz · commit 96bb4f48f5ed · 2025-03-18T17:16:53.000+01:00
diff --git a/recipes/full_finetune_distributed.py b/recipes/full_finetune_distributed.py
@@ -811,7 +811,6 @@ def validate(self) -> float:
             else float("inf")
         )
 
-        self._model.train()
         return avg_val_loss
 
     def train(self) -> None:
@@ -848,6 +847,7 @@ def train(self) -> None:
                     and self._device.type == "cuda"
                 ):
                     torch.cuda.memory._record_memory_history()
+                self._model.train()
                 utils.batch_to_device(batch, self._device)
 
                 # Calculate the number of unmasked tokens in the current batch