diff --git a/recipes/dev/early_exit_finetune_distributed.py b/recipes/dev/early_exit_finetune_distributed.py
index 7d8808d90d..5492b0d2cd 100644
--- a/recipes/dev/early_exit_finetune_distributed.py
+++ b/recipes/dev/early_exit_finetune_distributed.py
@@ -367,6 +367,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -639,6 +640,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -664,7 +666,7 @@ def _setup_data(
         collate_fn = _get_component_from_path(collate_fn)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/full_finetune_distributed.py b/recipes/full_finetune_distributed.py
index db4d1b59cc..a0f5e88717 100644
--- a/recipes/full_finetune_distributed.py
+++ b/recipes/full_finetune_distributed.py
@@ -331,6 +331,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -645,6 +646,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -670,7 +672,7 @@ def _setup_data(
         collate_fn = _get_component_from_path(collate_fn)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/full_finetune_single_device.py b/recipes/full_finetune_single_device.py
index 0c53666dad..f842a45017 100644
--- a/recipes/full_finetune_single_device.py
+++ b/recipes/full_finetune_single_device.py
@@ -305,6 +305,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -546,6 +547,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -573,7 +575,7 @@ def _setup_data(
             num_replicas=1,
             rank=0,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/knowledge_distillation_distributed.py b/recipes/knowledge_distillation_distributed.py
index 4e5165eb3b..5dfae086ad 100644
--- a/recipes/knowledge_distillation_distributed.py
+++ b/recipes/knowledge_distillation_distributed.py
@@ -292,6 +292,7 @@ def setup(self, cfg: DictConfig) -> None:
             cfg_dataset=cfg.dataset,
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -638,6 +639,7 @@ def _setup_data(
         cfg_dataset: DictConfig,
         shuffle: bool,
         batch_size: int,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports
@@ -662,7 +664,7 @@ def _setup_data(
             num_replicas=world_size,
             rank=rank,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/knowledge_distillation_single_device.py b/recipes/knowledge_distillation_single_device.py
index 1571ef1f44..c9cf058784 100644
--- a/recipes/knowledge_distillation_single_device.py
+++ b/recipes/knowledge_distillation_single_device.py
@@ -287,6 +287,7 @@ def setup(self, cfg: DictConfig) -> None:
             cfg_dataset=cfg.dataset,
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -519,6 +520,7 @@ def _setup_data(
         cfg_dataset: DictConfig,
         shuffle: bool,
         batch_size: int,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports
@@ -541,7 +543,7 @@ def _setup_data(
             num_replicas=1,
             rank=0,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/lora_dpo_distributed.py b/recipes/lora_dpo_distributed.py
index d54adc2cf4..8888266ab5 100644
--- a/recipes/lora_dpo_distributed.py
+++ b/recipes/lora_dpo_distributed.py
@@ -296,6 +296,7 @@ def setup(self, cfg: DictConfig) -> None:
             cfg_dataset=cfg.dataset,
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -486,6 +487,7 @@ def _setup_data(
         cfg_dataset: DictConfig,
         shuffle: bool,
         batch_size: int,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -504,7 +506,7 @@ def _setup_data(
             ds = config.instantiate(cfg_dataset, tokenizer=self._tokenizer)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
 
         dataloader = DataLoader(
diff --git a/recipes/lora_dpo_single_device.py b/recipes/lora_dpo_single_device.py
index c493b65602..6dbed68c98 100644
--- a/recipes/lora_dpo_single_device.py
+++ b/recipes/lora_dpo_single_device.py
@@ -247,6 +247,7 @@ def setup(self, cfg: DictConfig) -> None:
             cfg_dataset=cfg.dataset,
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -369,6 +370,7 @@ def _setup_data(
         cfg_dataset: DictConfig,
         shuffle: bool,
         batch_size: int,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports
@@ -389,7 +391,7 @@ def _setup_data(
             num_replicas=1,
             rank=0,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/lora_finetune_distributed.py b/recipes/lora_finetune_distributed.py
index d5304e496e..1be8ba71a9 100644
--- a/recipes/lora_finetune_distributed.py
+++ b/recipes/lora_finetune_distributed.py
@@ -312,6 +312,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -577,6 +578,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -602,7 +604,7 @@ def _setup_data(
         collate_fn = _get_component_from_path(collate_fn)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
 
         dataloader = DataLoader(
diff --git a/recipes/lora_finetune_single_device.py b/recipes/lora_finetune_single_device.py
index 5cf0a0f969..9c946657e6 100644
--- a/recipes/lora_finetune_single_device.py
+++ b/recipes/lora_finetune_single_device.py
@@ -308,6 +308,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -516,6 +517,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports
@@ -543,7 +545,7 @@ def _setup_data(
             num_replicas=1,
             rank=0,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/ppo_full_finetune_single_device.py b/recipes/ppo_full_finetune_single_device.py
index 71395527ee..efa6056726 100644
--- a/recipes/ppo_full_finetune_single_device.py
+++ b/recipes/ppo_full_finetune_single_device.py
@@ -225,6 +225,7 @@ def setup(self, cfg: DictConfig) -> None:
             cfg_dataset=cfg.dataset,
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
+            seed=cfg.get("seed") or 0,
         )
 
         self._setup_training_parameters(cfg)
@@ -651,7 +652,7 @@ def _setup_optimizer(
             return optimizer
 
     def _setup_data(
-        self, cfg_dataset: DictConfig, shuffle: bool, batch_size: int
+        self, cfg_dataset: DictConfig, shuffle: bool, batch_size: int, seed: int
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here.
@@ -670,7 +671,7 @@ def _setup_data(
             num_replicas=1,
             rank=0,
             shuffle=shuffle,
-            seed=0,
+            seed=seed,
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/qat_distributed.py b/recipes/qat_distributed.py
index f9ba25ca34..15e482e989 100644
--- a/recipes/qat_distributed.py
+++ b/recipes/qat_distributed.py
@@ -321,6 +321,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -592,6 +593,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -617,7 +619,7 @@ def _setup_data(
         collate_fn = _get_component_from_path(collate_fn)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
         dataloader = DataLoader(
             dataset=ds,
diff --git a/recipes/qat_lora_finetune_distributed.py b/recipes/qat_lora_finetune_distributed.py
index 074113b216..66637a3d8c 100644
--- a/recipes/qat_lora_finetune_distributed.py
+++ b/recipes/qat_lora_finetune_distributed.py
@@ -335,6 +335,7 @@ def setup(self, cfg: DictConfig) -> None:
             shuffle=cfg.shuffle,
             batch_size=cfg.batch_size,
             collate_fn=collate_name,
+            seed=cfg.get("seed") or 0,
         )
 
         # Finally update the recipe state which can only be correctly set after all of the
@@ -619,6 +620,7 @@ def _setup_data(
         shuffle: bool,
         batch_size: int,
         collate_fn: str,
+        seed: int,
     ) -> Tuple[DistributedSampler, DataLoader]:
         """
         All data related setup happens here. Currently this recipe only supports the
@@ -644,7 +646,7 @@ def _setup_data(
         collate_fn = _get_component_from_path(collate_fn)
 
         sampler = DistributedSampler(
-            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0
+            ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed
         )
 
         dataloader = DataLoader(
diff --git a/tests/recipes/test_full_finetune_distributed.py b/tests/recipes/test_full_finetune_distributed.py
index 4cdc42d96b..ecc2e69899 100644
--- a/tests/recipes/test_full_finetune_distributed.py
+++ b/tests/recipes/test_full_finetune_distributed.py
@@ -51,15 +51,15 @@ def _get_test_config_overrides(self, epochs: int = 2):
 
     def _fetch_expected_loss_values_multi_rank(self, model_type):
         loss_values_map = {
-            "llama2": [10.5209, 10.5217, 10.4945, 10.5136],
-            "llama3": [11.9839, 11.9684, 11.9596, 11.93656],
+            "llama2": [10.5320, 10.5581, 10.4741, 10.4980],
+            "llama3": [11.9265, 11.9249, 11.9737, 11.9757],
         }
         return loss_values_map[model_type]
 
     def _fetch_expected_loss_values_single_rank(self, model_type):
         loss_values_map = {
-            "llama2": [10.5051, 10.5572, 10.4780, 10.5678],
-            "llama3": [11.9742, 12.0049, 11.9382, 12.0464],
+            "llama2": [10.5509, 10.4980, 10.4821, 10.4682],
+            "llama3": [11.8887, 11.9787, 11.9533, 11.9979],
         }
         return loss_values_map[model_type]
 
diff --git a/tests/recipes/test_full_finetune_single_device.py b/tests/recipes/test_full_finetune_single_device.py
index d15601a1b1..69e3d3973b 100644
--- a/tests/recipes/test_full_finetune_single_device.py
+++ b/tests/recipes/test_full_finetune_single_device.py
@@ -56,8 +56,8 @@ def _get_test_config_overrides(self):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama2": [10.5201, 10.5217, 10.4945, 10.5136],
-            "llama3": [11.9839, 11.9684, 11.9596, 11.9366],
+            "llama2": [10.5320, 10.5581, 10.4740, 10.4984],
+            "llama3": [11.9265, 11.9249, 11.9737, 11.9757],
         }
 
         return loss_values_map[model_type]
diff --git a/tests/recipes/test_knowledge_distillation_distributed.py b/tests/recipes/test_knowledge_distillation_distributed.py
index 43c29400c3..8ed82689b8 100644
--- a/tests/recipes/test_knowledge_distillation_distributed.py
+++ b/tests/recipes/test_knowledge_distillation_distributed.py
@@ -55,7 +55,7 @@ def _get_test_config_overrides(self, epochs: int = 2):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama3": [11.8316, 11.7520, 11.7642, 11.7664],
+            "llama3": [11.7545, 11.7653, 11.7810, 11.7969],
         }
         return loss_values_map[model_type]
 
diff --git a/tests/recipes/test_knowledge_distillation_single_device.py b/tests/recipes/test_knowledge_distillation_single_device.py
index 76a6b1479c..e7cee96003 100644
--- a/tests/recipes/test_knowledge_distillation_single_device.py
+++ b/tests/recipes/test_knowledge_distillation_single_device.py
@@ -56,7 +56,7 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama3": [11.7898, 11.7825, 11.7788, 11.7671],
+            "llama3": [11.7612, 11.7607, 11.7856, 11.7869],
         }
         return loss_values_map[model_type]
 
diff --git a/tests/recipes/test_lora_finetune_distributed.py b/tests/recipes/test_lora_finetune_distributed.py
index ef2686aeba..6ea05c3aa0 100644
--- a/tests/recipes/test_lora_finetune_distributed.py
+++ b/tests/recipes/test_lora_finetune_distributed.py
@@ -54,8 +54,8 @@ def _fetch_expected_loss_values(self, model_type):
         # These values have been validated against single device recipe test via
         # https://gist.github.com/ebsmothers/f1c3db7c66655a23a91e0290360960c4
         loss_values_map = {
-            "llama2": [10.5209, 10.5269, 10.5130, 10.5242],
-            "llama3": [11.9839, 11.9691, 11.9617, 11.9383],
+            "llama2": [10.5320, 10.5608, 10.4895, 10.5068],
+            "llama3": [11.9265, 11.9255, 11.9754, 11.9780],
         }
         return loss_values_map[model_type]
 
diff --git a/tests/recipes/test_lora_finetune_single_device.py b/tests/recipes/test_lora_finetune_single_device.py
index 39b1fa3b6a..a760de0d47 100644
--- a/tests/recipes/test_lora_finetune_single_device.py
+++ b/tests/recipes/test_lora_finetune_single_device.py
@@ -52,15 +52,15 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama2": [10.5209, 10.5269, 10.5130, 10.5242],
-            "llama3": [11.9838, 11.9691, 11.9616, 11.9383],
+            "llama2": [10.5320, 10.5608, 10.4895, 10.5068],
+            "llama3": [11.9265, 11.9255, 11.9754, 11.9780],
         }
         return loss_values_map[model_type]
 
     def _fetch_qlora_expected_loss_values(self, dtype):
         if dtype == "bf16":
-            return [10.5197, 10.5272, 10.5129, 10.5243]
-        return [10.5198, 10.5271, 10.5131, 10.5244]
+            return [10.5313, 10.5575, 10.4884, 10.5073]
+        return [10.5313, 10.5576, 10.4885, 10.5071]
 
     @pytest.mark.integration_test
     @pytest.mark.parametrize(
diff --git a/tests/recipes/test_ppo_full_finetune_single_device.py b/tests/recipes/test_ppo_full_finetune_single_device.py
index 412c4c06dd..acb67b016b 100644
--- a/tests/recipes/test_ppo_full_finetune_single_device.py
+++ b/tests/recipes/test_ppo_full_finetune_single_device.py
@@ -123,18 +123,18 @@ def test_loss(self, tmpdir, monkeypatch):
 
         loss_values = get_loss_values_from_metric_logger(log_file)
         expected_loss_values = [
-            1.0403,
-            0.9495,
-            0.9084,
-            1.0494,
-            0.9609,
-            0.8846,
-            1.0282,
-            0.9390,
-            0.8915,
-            1.0166,
-            0.9231,
-            0.9352,
+            1.0522,
+            0.9608,
+            0.9141,
+            1.0410,
+            0.9544,
+            0.8663,
+            1.0258,
+            0.9375,
+            0.8831,
+            1.0182,
+            0.9241,
+            0.9411,
         ]
         torch.testing.assert_close(
             loss_values, expected_loss_values, atol=1e-4, rtol=1e-5
diff --git a/tests/recipes/test_qat_distributed.py b/tests/recipes/test_qat_distributed.py
index 34dd190125..9f2193a13f 100644
--- a/tests/recipes/test_qat_distributed.py
+++ b/tests/recipes/test_qat_distributed.py
@@ -45,8 +45,8 @@ def _get_test_config_overrides(self):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama2": [10.5211, 10.5217, 10.4944, 10.5134],
-            "llama3": [11.9836, 11.9683, 11.9594, 11.9366],
+            "llama2": [10.5337, 10.5563, 10.4786, 10.5002],
+            "llama3": [11.9270, 11.9240, 11.9731, 11.9751],
         }
         return loss_values_map[model_type]
 
diff --git a/tests/recipes/test_qat_lora_finetune_distributed.py b/tests/recipes/test_qat_lora_finetune_distributed.py
index 6c43adcc73..be49c0c80f 100644
--- a/tests/recipes/test_qat_lora_finetune_distributed.py
+++ b/tests/recipes/test_qat_lora_finetune_distributed.py
@@ -53,7 +53,7 @@ def _get_test_config_overrides(self):
 
     def _fetch_expected_loss_values(self, model_type):
         loss_values_map = {
-            "llama3": [11.9835, 11.9694, 11.9615, 11.9383],
+            "llama3": [11.9259, 11.9250, 11.9753, 11.9774],
         }
         return loss_values_map[model_type]