huggingface
diff --git a/‎src/accelerate/accelerator.py‎
Lines changed: 1 addition & 0 deletions b/‎src/accelerate/accelerator.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/accelerate/big_modeling.py‎
Lines changed: 5 additions & 0 deletions b/‎src/accelerate/big_modeling.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/accelerate/checkpointing.py‎
Lines changed: 5 additions & 0 deletions b/‎src/accelerate/checkpointing.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/accelerate/commands/config/cluster.py‎
Lines changed: 16 additions & 1 deletion b/‎src/accelerate/commands/config/cluster.py‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎src/accelerate/commands/config/config_utils.py‎
Lines changed: 1 addition & 0 deletions b/‎src/accelerate/commands/config/config_utils.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/accelerate/commands/config/default.py‎
Lines changed: 9 additions & 0 deletions b/‎src/accelerate/commands/config/default.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/accelerate/commands/env.py‎
Lines changed: 13 additions & 1 deletion b/‎src/accelerate/commands/env.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎src/accelerate/commands/launch.py‎
Lines changed: 5 additions & 0 deletions b/‎src/accelerate/commands/launch.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/accelerate/local_sgd.py‎
Lines changed: 1 addition & 0 deletions b/‎src/accelerate/local_sgd.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/accelerate/state.py‎
Lines changed: 10 additions & 1 deletion b/‎src/accelerate/state.py‎
Lines changed: 10 additions & 1 deletion
@@ -664,6 +664,7 @@ def multi_device(self):
             DistributedType.MULTI_NPU,
             DistributedType.MULTI_XPU,
             DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NEURON,
         )
 
     @property
 
@@ -42,6 +42,7 @@
     is_bnb_available,
     is_mlu_available,
     is_musa_available,
+    is_neuron_available,
     is_npu_available,
     is_sdaa_available,
     is_xpu_available,
@@ -477,6 +478,8 @@ def wrapper(*args, **kwargs):
             model.musa = add_warning(model.musa, model)
         elif is_xpu_available():
             model.xpu = add_warning(model.xpu, model)
+        elif is_neuron_available():
+            model.neuron = add_warning(model.neuron, model)
         else:
             model.cuda = add_warning(model.cuda, model)
 
@@ -499,6 +502,8 @@ def wrapper(*args, **kwargs):
             device = f"sdaa:{device}"
         elif is_musa_available() and isinstance(device, int):
             device = f"musa:{device}"
+        elif is_neuron_available() and isinstance(device, int):
+            device = f"neuron:{device}"
         if device != "disk":
             model.to(device)
         else:
 
@@ -35,6 +35,7 @@
     is_hpu_available,
     is_mlu_available,
     is_musa_available,
+    is_neuron_available,
     is_sdaa_available,
     is_torch_version,
     is_torch_xla_available,
@@ -167,6 +168,8 @@ def save_accelerator_state(
         states["torch_musa_manual_seed"] = torch.musa.get_rng_state_all()
     if is_hpu_available():
         states["torch_hpu_manual_seed"] = torch.hpu.get_rng_state_all()
+    if is_neuron_available():
+        states["torch_neuron_manual_seed"] = torch.neuron.get_rng_state_all()
     if is_cuda_available():
         states["torch_cuda_manual_seed"] = torch.cuda.get_rng_state_all()
     if is_torch_xla_available():
@@ -302,6 +305,8 @@ def load_accelerator_state(
             torch.musa.set_rng_state_all(states["torch_musa_manual_seed"])
         elif is_hpu_available():
             torch.hpu.set_rng_state_all(states["torch_hpu_manual_seed"])
+        elif is_neuron_available():
+            torch.neuron.set_rng_state_all(states["torch_neuron_manual_seed"])
         else:
             torch.cuda.set_rng_state_all(states["torch_cuda_manual_seed"])
         if is_torch_xla_available():
 
@@ -26,6 +26,7 @@
     is_mps_available,
     is_msamp_available,
     is_musa_available,
+    is_neuron_available,
     is_npu_available,
     is_sdaa_available,
     is_torchao_available,
@@ -68,6 +69,7 @@ def get_cluster_input():
             "multi-MLU",
             "multi-SDAA",
             "multi-MUSA",
+            "multi-NEURON",
             "TPU",
         ],
         _convert_distributed_mode,
@@ -92,6 +94,7 @@ def get_cluster_input():
         DistributedType.MULTI_XPU,
         DistributedType.MULTI_CPU,
         DistributedType.MULTI_HPU,
+        DistributedType.MULTI_NEURON,
     ]:
         num_machines = _ask_field(
             "How many different machines will you use (use more than 1 for multi-node training)? [1]: ",
@@ -218,6 +221,7 @@ def get_cluster_input():
             DistributedType.MULTI_MLU,
             DistributedType.MULTI_SDAA,
             DistributedType.MULTI_MUSA,
+            DistributedType.MULTI_NEURON,
             DistributedType.NO,
         ]
         and not use_mps
@@ -229,6 +233,9 @@ def get_cluster_input():
             error_message="Please enter yes or no.",
         )
         if use_deepspeed:
+            if distributed_type is DistributedType.MULTI_NEURON:
+                raise RuntimeError("DeepSpeed is not supported on Neuron devices.")
+
             distributed_type = DistributedType.DEEPSPEED
             assert is_deepspeed_available(), (
                 "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
@@ -376,6 +383,7 @@ def get_cluster_input():
         DistributedType.MULTI_MUSA,
         DistributedType.MULTI_XPU,
         DistributedType.MULTI_HPU,
+        DistributedType.MULTI_NEURON,
     ]:
         use_fsdp = _ask_field(
             "Do you want to use FullyShardedDataParallel? [yes/NO]: ",
@@ -384,7 +392,10 @@ def get_cluster_input():
             error_message="Please enter yes or no.",
         )
         if use_fsdp:
+            if distributed_type is DistributedType.MULTI_NEURON:
+                raise NotImplementedError("FSDP is not currently supported on Neuron devices.")
             distributed_type = DistributedType.FSDP
+
         if distributed_type == DistributedType.FSDP:
             fsdp_config["fsdp_version"] = _ask_options(
                 "What should be your FSDP version? [2]: ",
@@ -624,10 +635,11 @@ def get_cluster_input():
         DistributedType.MULTI_SDAA,
         DistributedType.MULTI_MUSA,
         DistributedType.MULTI_NPU,
+        DistributedType.MULTI_NEURON,
         DistributedType.XLA,
     ]:
         machine_type = str(distributed_type).split(".")[1].replace("MULTI_", "")
-        if machine_type == "TPU":
+        if machine_type in ["TPU", "NEURON"]:
             machine_type += " cores"
         elif machine_type == "CPU":
             machine_type = "processes"
@@ -664,6 +676,7 @@ def get_cluster_input():
             DistributedType.MULTI_NPU,
             DistributedType.MULTI_XPU,
             DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NEURON,
             DistributedType.NO,
         ]
         and not use_cpu
@@ -681,6 +694,8 @@ def get_cluster_input():
             machine_type = "XPU(s)"
         elif is_hpu_available():
             machine_type = "HPU(s)"
+        elif is_neuron_available():
+            machine_type = "Neuron cores"
         else:
             machine_type = "GPU(s)"
         gpu_ids = _ask_field(
 
@@ -81,6 +81,7 @@ def _convert_distributed_mode(value):
             "MULTI_MLU",
             "MULTI_SDAA",
             "MULTI_MUSA",
+            "MULTI_NEURON",
             "XLA",
         ][value]
     )
 
@@ -22,6 +22,7 @@
     is_hpu_available,
     is_mlu_available,
     is_musa_available,
+    is_neuron_available,
     is_npu_available,
     is_sdaa_available,
     is_xpu_available,
@@ -118,6 +119,14 @@ def write_basic_config(mixed_precision="no", save_location: str = default_json_c
             config["distributed_type"] = "MULTI_NPU"
         else:
             config["distributed_type"] = "NO"
+    elif is_neuron_available():
+        num_neuron_cores = torch.neuron.device_count()
+        config["num_processes"] = num_neuron_cores
+        config["use_cpu"] = False
+        if num_neuron_cores > 1:
+            config["distributed_type"] = "MULTI_NEURON"
+        else:
+            config["distributed_type"] = "NO"
     else:
         num_xpus = 0
         config["use_cpu"] = True
 
@@ -26,7 +26,14 @@
 from accelerate import __version__ as version
 from accelerate.commands.config import default_config_file, load_config_from_file
 
-from ..utils import is_mlu_available, is_musa_available, is_npu_available, is_sdaa_available, is_xpu_available
+from ..utils import (
+    is_mlu_available,
+    is_musa_available,
+    is_neuron_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_xpu_available,
+)
 
 
 def env_command_parser(subparsers=None):
@@ -52,6 +59,7 @@ def env_command(args):
     pt_sdaa_available = is_sdaa_available()
     pt_musa_available = is_musa_available()
     pt_npu_available = is_npu_available()
+    pt_neuron_available = is_neuron_available()
 
     accelerator = "N/A"
     if pt_cuda_available:
@@ -66,6 +74,8 @@ def env_command(args):
         accelerator = "MUSA"
     elif pt_npu_available:
         accelerator = "NPU"
+    elif pt_neuron_available:
+        accelerator = "NEURON"
 
     accelerate_config = "Not found"
     # Get the default from the config file.
@@ -101,6 +111,8 @@ def env_command(args):
         info["SDAA type"] = torch.sdaa.get_device_name()
     elif pt_musa_available:
         info["MUSA type"] = torch.musa.get_device_name()
+    elif pt_neuron_available:
+        info["NEURON type"] = torch.neuron.get_device_name()
     elif pt_npu_available:
         info["CANN version"] = torch.version.cann
 
 
@@ -42,6 +42,7 @@
     is_hpu_available,
     is_mlu_available,
     is_musa_available,
+    is_neuron_available,
     is_npu_available,
     is_rich_available,
     is_sagemaker_available,
@@ -1231,6 +1232,7 @@ def _validate_launch_command(args):
                     DistributedType.MULTI_MUSA,
                     DistributedType.MULTI_XPU,
                     DistributedType.MULTI_HPU,
+                    DistributedType.MULTI_NEURON,
                 )
                 else False
             )
@@ -1309,6 +1311,8 @@ def _validate_launch_command(args):
                 args.num_processes = torch.npu.device_count()
             elif is_hpu_available():
                 args.num_processes = torch.hpu.device_count()
+            elif is_neuron_available():
+                args.num_processes = torch.neuron.device_count()
             else:
                 args.num_processes = torch.cuda.device_count()
             warned.append(f"\t`--num_processes` was set to a value of `{args.num_processes}`")
@@ -1324,6 +1328,7 @@ def _validate_launch_command(args):
                 or (is_mlu_available() and torch.mlu.device_count() > 1)
                 or (is_sdaa_available() and torch.sdaa.device_count() > 1)
                 or (is_musa_available() and torch.musa.device_count() > 1)
+                or (is_neuron_available() and torch.neuron.device_count() > 1)
                 or (torch.cuda.is_available() and torch.cuda.device_count() > 1)
             )
         ):
 
@@ -75,6 +75,7 @@ def __init__(self, accelerator: Accelerator, model: torch.nn.Module, local_sgd_s
             DistributedType.MULTI_SDAA,
             DistributedType.MULTI_MUSA,
             DistributedType.MULTI_NPU,
+            DistributedType.MULTI_NEURON,
         ]:
             raise NotImplementedError("LocalSGD is supported only for CPUs and GPUs (no DeepSpeed or MegatronLM)")
         self.enabled = enabled and accelerator.distributed_type != DistributedType.NO
 
@@ -42,6 +42,7 @@
     is_mlu_available,
     is_mps_available,
     is_musa_available,
+    is_neuron_available,
     is_npu_available,
     is_sdaa_available,
     is_torch_xla_available,
@@ -404,6 +405,7 @@ def wait_for_everyone(self):
             DistributedType.MULTI_XPU,
             DistributedType.MULTI_CPU,
             DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NEURON,
             DistributedType.DEEPSPEED,
             DistributedType.FSDP,
         ):
@@ -726,6 +728,7 @@ def default_device(self) -> torch.device:
         - MUSA if `is_musa_available()`
         - NPU if `is_npu_available()`
         - HPU if `is_hpu_available()`
+        - NEURON if `is_neuron_available()`
         - CPU otherwise
         """
         if is_mps_available():
@@ -747,6 +750,8 @@ def default_device(self) -> torch.device:
             return torch.device("cuda")
         elif is_xpu_available():
             return torch.device("xpu")
+        elif is_neuron_available():
+            return torch.device("neuron")
         else:
             return torch.device("cpu")
 
@@ -791,6 +796,9 @@ def _prepare_backend(
                 if backend is None:
                     backend = "xccl"
                 distributed_type = DistributedType.MULTI_XPU
+            elif is_neuron_available():
+                backend = "neuron"
+                distributed_type = DistributedType.MULTI_NEURON
 
         if (
             distributed_type is None
@@ -821,7 +829,7 @@ def set_device(self):
             self.device = torch.device("cpu") if self._cpu else self.default_device
             return
         device = str(self.distributed_type).split(".")[-1].replace("MULTI_", "").lower()
-        if device not in ("cpu", "gpu", "mlu", "musa", "npu", "xpu", "xla", "hpu", "sdaa"):
+        if device not in ("cpu", "gpu", "mlu", "musa", "npu", "xpu", "xla", "hpu", "sdaa", "neuron"):
             raise ValueError(
                 f"Can't set device for {self.distributed_type} ({device}), verify we should be calling `_set_device()` for it!"
             )
@@ -984,6 +992,7 @@ def __init__(
                 DistributedType.MULTI_NPU,
                 DistributedType.MULTI_XPU,
                 DistributedType.MULTI_HPU,
+                DistributedType.MULTI_NEURON,
             ]:
                 # TODO: Siro - remove when axolotl fixes their side
                 if not os.environ.get("ACCELERATE_ALLOW_CP_STANDALONE", "false").lower() == "true":
Original file line number	Diff line number	Diff line change
`@@ -664,6 +664,7 @@ def multi_device(self):`
`664`	`664`	`DistributedType.MULTI_NPU,`
`665`	`665`	`DistributedType.MULTI_XPU,`
`666`	`666`	`DistributedType.MULTI_HPU,`
	`667`	`+ DistributedType.MULTI_NEURON,`
`667`	`668`	`)`
`668`	`669`
`669`	`670`	`@property`
Original file line number	Diff line number	Diff line change
`@@ -81,6 +81,7 @@ def _convert_distributed_mode(value):`
`81`	`81`	`"MULTI_MLU",`
`82`	`82`	`"MULTI_SDAA",`
`83`	`83`	`"MULTI_MUSA",`
	`84`	`+ "MULTI_NEURON",`
`84`	`85`	`"XLA",`
`85`	`86`	`][value]`
`86`	`87`	`)`