diff --git a/recipes/dev/early_exit_finetune_distributed.py b/recipes/dev/early_exit_finetune_distributed.py index 7d8808d90d..5492b0d2cd 100644 --- a/recipes/dev/early_exit_finetune_distributed.py +++ b/recipes/dev/early_exit_finetune_distributed.py @@ -367,6 +367,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -639,6 +640,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -664,7 +666,7 @@ def _setup_data( collate_fn = _get_component_from_path(collate_fn) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/full_finetune_distributed.py b/recipes/full_finetune_distributed.py index db4d1b59cc..a0f5e88717 100644 --- a/recipes/full_finetune_distributed.py +++ b/recipes/full_finetune_distributed.py @@ -331,6 +331,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -645,6 +646,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -670,7 +672,7 @@ def _setup_data( collate_fn = _get_component_from_path(collate_fn) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/full_finetune_single_device.py b/recipes/full_finetune_single_device.py index 0c53666dad..f842a45017 100644 --- a/recipes/full_finetune_single_device.py +++ b/recipes/full_finetune_single_device.py @@ -305,6 +305,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -546,6 +547,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -573,7 +575,7 @@ def _setup_data( num_replicas=1, rank=0, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/knowledge_distillation_distributed.py b/recipes/knowledge_distillation_distributed.py index 4e5165eb3b..5dfae086ad 100644 --- a/recipes/knowledge_distillation_distributed.py +++ b/recipes/knowledge_distillation_distributed.py @@ -292,6 +292,7 @@ def setup(self, cfg: DictConfig) -> None: cfg_dataset=cfg.dataset, shuffle=cfg.shuffle, batch_size=cfg.batch_size, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -638,6 +639,7 @@ def _setup_data( cfg_dataset: DictConfig, shuffle: bool, batch_size: int, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports @@ -662,7 +664,7 @@ def _setup_data( num_replicas=world_size, rank=rank, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/knowledge_distillation_single_device.py b/recipes/knowledge_distillation_single_device.py index 1571ef1f44..c9cf058784 100644 --- a/recipes/knowledge_distillation_single_device.py +++ b/recipes/knowledge_distillation_single_device.py @@ -287,6 +287,7 @@ def setup(self, cfg: DictConfig) -> None: cfg_dataset=cfg.dataset, shuffle=cfg.shuffle, batch_size=cfg.batch_size, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -519,6 +520,7 @@ def _setup_data( cfg_dataset: DictConfig, shuffle: bool, batch_size: int, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports @@ -541,7 +543,7 @@ def _setup_data( num_replicas=1, rank=0, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/lora_dpo_distributed.py b/recipes/lora_dpo_distributed.py index d54adc2cf4..8888266ab5 100644 --- a/recipes/lora_dpo_distributed.py +++ b/recipes/lora_dpo_distributed.py @@ -296,6 +296,7 @@ def setup(self, cfg: DictConfig) -> None: cfg_dataset=cfg.dataset, shuffle=cfg.shuffle, batch_size=cfg.batch_size, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -486,6 +487,7 @@ def _setup_data( cfg_dataset: DictConfig, shuffle: bool, batch_size: int, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -504,7 +506,7 @@ def _setup_data( ds = config.instantiate(cfg_dataset, tokenizer=self._tokenizer) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( diff --git a/recipes/lora_dpo_single_device.py b/recipes/lora_dpo_single_device.py index c493b65602..6dbed68c98 100644 --- a/recipes/lora_dpo_single_device.py +++ b/recipes/lora_dpo_single_device.py @@ -247,6 +247,7 @@ def setup(self, cfg: DictConfig) -> None: cfg_dataset=cfg.dataset, shuffle=cfg.shuffle, batch_size=cfg.batch_size, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -369,6 +370,7 @@ def _setup_data( cfg_dataset: DictConfig, shuffle: bool, batch_size: int, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports @@ -389,7 +391,7 @@ def _setup_data( num_replicas=1, rank=0, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/lora_finetune_distributed.py b/recipes/lora_finetune_distributed.py index d5304e496e..1be8ba71a9 100644 --- a/recipes/lora_finetune_distributed.py +++ b/recipes/lora_finetune_distributed.py @@ -312,6 +312,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -577,6 +578,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -602,7 +604,7 @@ def _setup_data( collate_fn = _get_component_from_path(collate_fn) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( diff --git a/recipes/lora_finetune_single_device.py b/recipes/lora_finetune_single_device.py index 5cf0a0f969..9c946657e6 100644 --- a/recipes/lora_finetune_single_device.py +++ b/recipes/lora_finetune_single_device.py @@ -308,6 +308,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -516,6 +517,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports @@ -543,7 +545,7 @@ def _setup_data( num_replicas=1, rank=0, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/ppo_full_finetune_single_device.py b/recipes/ppo_full_finetune_single_device.py index 71395527ee..efa6056726 100644 --- a/recipes/ppo_full_finetune_single_device.py +++ b/recipes/ppo_full_finetune_single_device.py @@ -225,6 +225,7 @@ def setup(self, cfg: DictConfig) -> None: cfg_dataset=cfg.dataset, shuffle=cfg.shuffle, batch_size=cfg.batch_size, + seed=cfg.get("seed") or 0, ) self._setup_training_parameters(cfg) @@ -651,7 +652,7 @@ def _setup_optimizer( return optimizer def _setup_data( - self, cfg_dataset: DictConfig, shuffle: bool, batch_size: int + self, cfg_dataset: DictConfig, shuffle: bool, batch_size: int, seed: int ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. @@ -670,7 +671,7 @@ def _setup_data( num_replicas=1, rank=0, shuffle=shuffle, - seed=0, + seed=seed, ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/qat_distributed.py b/recipes/qat_distributed.py index f9ba25ca34..15e482e989 100644 --- a/recipes/qat_distributed.py +++ b/recipes/qat_distributed.py @@ -321,6 +321,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -592,6 +593,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -617,7 +619,7 @@ def _setup_data( collate_fn = _get_component_from_path(collate_fn) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( dataset=ds, diff --git a/recipes/qat_lora_finetune_distributed.py b/recipes/qat_lora_finetune_distributed.py index 074113b216..66637a3d8c 100644 --- a/recipes/qat_lora_finetune_distributed.py +++ b/recipes/qat_lora_finetune_distributed.py @@ -335,6 +335,7 @@ def setup(self, cfg: DictConfig) -> None: shuffle=cfg.shuffle, batch_size=cfg.batch_size, collate_fn=collate_name, + seed=cfg.get("seed") or 0, ) # Finally update the recipe state which can only be correctly set after all of the @@ -619,6 +620,7 @@ def _setup_data( shuffle: bool, batch_size: int, collate_fn: str, + seed: int, ) -> Tuple[DistributedSampler, DataLoader]: """ All data related setup happens here. Currently this recipe only supports the @@ -644,7 +646,7 @@ def _setup_data( collate_fn = _get_component_from_path(collate_fn) sampler = DistributedSampler( - ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=0 + ds, num_replicas=world_size, rank=rank, shuffle=shuffle, seed=seed ) dataloader = DataLoader( diff --git a/tests/recipes/test_full_finetune_distributed.py b/tests/recipes/test_full_finetune_distributed.py index 4cdc42d96b..ecc2e69899 100644 --- a/tests/recipes/test_full_finetune_distributed.py +++ b/tests/recipes/test_full_finetune_distributed.py @@ -51,15 +51,15 @@ def _get_test_config_overrides(self, epochs: int = 2): def _fetch_expected_loss_values_multi_rank(self, model_type): loss_values_map = { - "llama2": [10.5209, 10.5217, 10.4945, 10.5136], - "llama3": [11.9839, 11.9684, 11.9596, 11.93656], + "llama2": [10.5320, 10.5581, 10.4741, 10.4980], + "llama3": [11.9265, 11.9249, 11.9737, 11.9757], } return loss_values_map[model_type] def _fetch_expected_loss_values_single_rank(self, model_type): loss_values_map = { - "llama2": [10.5051, 10.5572, 10.4780, 10.5678], - "llama3": [11.9742, 12.0049, 11.9382, 12.0464], + "llama2": [10.5509, 10.4980, 10.4821, 10.4682], + "llama3": [11.8887, 11.9787, 11.9533, 11.9979], } return loss_values_map[model_type] diff --git a/tests/recipes/test_full_finetune_single_device.py b/tests/recipes/test_full_finetune_single_device.py index d15601a1b1..69e3d3973b 100644 --- a/tests/recipes/test_full_finetune_single_device.py +++ b/tests/recipes/test_full_finetune_single_device.py @@ -56,8 +56,8 @@ def _get_test_config_overrides(self): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama2": [10.5201, 10.5217, 10.4945, 10.5136], - "llama3": [11.9839, 11.9684, 11.9596, 11.9366], + "llama2": [10.5320, 10.5581, 10.4740, 10.4984], + "llama3": [11.9265, 11.9249, 11.9737, 11.9757], } return loss_values_map[model_type] diff --git a/tests/recipes/test_knowledge_distillation_distributed.py b/tests/recipes/test_knowledge_distillation_distributed.py index 43c29400c3..8ed82689b8 100644 --- a/tests/recipes/test_knowledge_distillation_distributed.py +++ b/tests/recipes/test_knowledge_distillation_distributed.py @@ -55,7 +55,7 @@ def _get_test_config_overrides(self, epochs: int = 2): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama3": [11.8316, 11.7520, 11.7642, 11.7664], + "llama3": [11.7545, 11.7653, 11.7810, 11.7969], } return loss_values_map[model_type] diff --git a/tests/recipes/test_knowledge_distillation_single_device.py b/tests/recipes/test_knowledge_distillation_single_device.py index 76a6b1479c..e7cee96003 100644 --- a/tests/recipes/test_knowledge_distillation_single_device.py +++ b/tests/recipes/test_knowledge_distillation_single_device.py @@ -56,7 +56,7 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama3": [11.7898, 11.7825, 11.7788, 11.7671], + "llama3": [11.7612, 11.7607, 11.7856, 11.7869], } return loss_values_map[model_type] diff --git a/tests/recipes/test_lora_finetune_distributed.py b/tests/recipes/test_lora_finetune_distributed.py index ef2686aeba..6ea05c3aa0 100644 --- a/tests/recipes/test_lora_finetune_distributed.py +++ b/tests/recipes/test_lora_finetune_distributed.py @@ -54,8 +54,8 @@ def _fetch_expected_loss_values(self, model_type): # These values have been validated against single device recipe test via # https://gist.github.com/ebsmothers/f1c3db7c66655a23a91e0290360960c4 loss_values_map = { - "llama2": [10.5209, 10.5269, 10.5130, 10.5242], - "llama3": [11.9839, 11.9691, 11.9617, 11.9383], + "llama2": [10.5320, 10.5608, 10.4895, 10.5068], + "llama3": [11.9265, 11.9255, 11.9754, 11.9780], } return loss_values_map[model_type] diff --git a/tests/recipes/test_lora_finetune_single_device.py b/tests/recipes/test_lora_finetune_single_device.py index 39b1fa3b6a..a760de0d47 100644 --- a/tests/recipes/test_lora_finetune_single_device.py +++ b/tests/recipes/test_lora_finetune_single_device.py @@ -52,15 +52,15 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama2": [10.5209, 10.5269, 10.5130, 10.5242], - "llama3": [11.9838, 11.9691, 11.9616, 11.9383], + "llama2": [10.5320, 10.5608, 10.4895, 10.5068], + "llama3": [11.9265, 11.9255, 11.9754, 11.9780], } return loss_values_map[model_type] def _fetch_qlora_expected_loss_values(self, dtype): if dtype == "bf16": - return [10.5197, 10.5272, 10.5129, 10.5243] - return [10.5198, 10.5271, 10.5131, 10.5244] + return [10.5313, 10.5575, 10.4884, 10.5073] + return [10.5313, 10.5576, 10.4885, 10.5071] @pytest.mark.integration_test @pytest.mark.parametrize( diff --git a/tests/recipes/test_ppo_full_finetune_single_device.py b/tests/recipes/test_ppo_full_finetune_single_device.py index 412c4c06dd..acb67b016b 100644 --- a/tests/recipes/test_ppo_full_finetune_single_device.py +++ b/tests/recipes/test_ppo_full_finetune_single_device.py @@ -123,18 +123,18 @@ def test_loss(self, tmpdir, monkeypatch): loss_values = get_loss_values_from_metric_logger(log_file) expected_loss_values = [ - 1.0403, - 0.9495, - 0.9084, - 1.0494, - 0.9609, - 0.8846, - 1.0282, - 0.9390, - 0.8915, - 1.0166, - 0.9231, - 0.9352, + 1.0522, + 0.9608, + 0.9141, + 1.0410, + 0.9544, + 0.8663, + 1.0258, + 0.9375, + 0.8831, + 1.0182, + 0.9241, + 0.9411, ] torch.testing.assert_close( loss_values, expected_loss_values, atol=1e-4, rtol=1e-5 diff --git a/tests/recipes/test_qat_distributed.py b/tests/recipes/test_qat_distributed.py index 34dd190125..9f2193a13f 100644 --- a/tests/recipes/test_qat_distributed.py +++ b/tests/recipes/test_qat_distributed.py @@ -45,8 +45,8 @@ def _get_test_config_overrides(self): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama2": [10.5211, 10.5217, 10.4944, 10.5134], - "llama3": [11.9836, 11.9683, 11.9594, 11.9366], + "llama2": [10.5337, 10.5563, 10.4786, 10.5002], + "llama3": [11.9270, 11.9240, 11.9731, 11.9751], } return loss_values_map[model_type] diff --git a/tests/recipes/test_qat_lora_finetune_distributed.py b/tests/recipes/test_qat_lora_finetune_distributed.py index 6c43adcc73..be49c0c80f 100644 --- a/tests/recipes/test_qat_lora_finetune_distributed.py +++ b/tests/recipes/test_qat_lora_finetune_distributed.py @@ -53,7 +53,7 @@ def _get_test_config_overrides(self): def _fetch_expected_loss_values(self, model_type): loss_values_map = { - "llama3": [11.9835, 11.9694, 11.9615, 11.9383], + "llama3": [11.9259, 11.9250, 11.9753, 11.9774], } return loss_values_map[model_type]