review: extend and use common dtype helper

dacorvo · dacorvo · commit dbe544f645c4 · 2025-05-13T17:00:06.000+02:00
diff --git a/optimum/neuron/models/inference/nxd/backend/config.py b/optimum/neuron/models/inference/nxd/backend/config.py
@@ -18,24 +18,12 @@
 import torch
 
 from .....configuration_utils import NeuronConfig, register_neuron_config
+from .....utils import map_torch_dtype
 
 
 NEURON_CONFIG_FILE = "neuron_config.json"
 
 
-def to_torch_dtype(dtype_str: str) -> torch.dtype:
-    dtype_mapping = {
-        "float32": torch.float32,
-        "float16": torch.float16,
-        "bfloat16": torch.bfloat16,
-        "fp32": torch.float32,
-        "fp16": torch.float16,
-        "bf16": torch.bfloat16,
-    }
-    assert dtype_str in dtype_mapping, f"Unsupported dtype: {dtype_str}"
-    return dtype_mapping[dtype_str]
-
-
 def to_dict(obj):
     if type(obj) is dict:
         return {k: to_dict(v) for k, v in obj.items()}
@@ -131,15 +119,15 @@ def __init__(
         self.tp_degree = tp_degree
         self.torch_dtype = torch_dtype
         if isinstance(self.torch_dtype, str):
-            self.torch_dtype = to_torch_dtype(self.torch_dtype)
+            self.torch_dtype = map_torch_dtype(self.torch_dtype)
         self.n_active_tokens = self.sequence_length if n_active_tokens is None else n_active_tokens
         self.output_logits = output_logits
 
         self.padding_side = padding_side
 
         self.rpl_reduce_dtype = torch_dtype if rpl_reduce_dtype is None else rpl_reduce_dtype
         if isinstance(self.rpl_reduce_dtype, str):
-            self.rpl_reduce_dtype = to_torch_dtype(self.rpl_reduce_dtype)
+            self.rpl_reduce_dtype = map_torch_dtype(self.rpl_reduce_dtype)
 
         # fallback to sequence_length is for compatibility with vllm
         self.max_context_length = max_context_length
diff --git a/optimum/neuron/utils/misc.py b/optimum/neuron/utils/misc.py
@@ -631,6 +631,9 @@ def map_torch_dtype(dtype: Union[str, torch.dtype]):
         "float64": torch.float64,
         "int32": torch.int32,
         "int64": torch.int64,
+        "bf16": torch.bfloat16,
+        "fp16": torch.float16,
+        "fp32": torch.float32,
     }
 
     if isinstance(dtype, str) and dtype in dtype_mapping:
diff --git a/tests/decoder/test_decoder_export.py b/tests/decoder/test_decoder_export.py
@@ -19,8 +19,8 @@
 from transformers import AutoModelForCausalLM
 
 from optimum.neuron import NeuronModelForCausalLM
-from optimum.neuron.models.inference.nxd.backend.config import to_torch_dtype
 from optimum.neuron.models.inference.nxd.llama.modeling_llama import LlamaNxDModelForCausalLM
+from optimum.neuron.utils import map_torch_dtype
 from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
 
 
@@ -53,7 +53,7 @@ def check_neuron_model(neuron_model, batch_size=None, sequence_length=None, num_
         if hasattr(neuron_config, "auto_cast_type"):
             assert neuron_config.auto_cast_type == auto_cast_type
         elif hasattr(neuron_config, "torch_dtype"):
-            assert neuron_config.torch_dtype == to_torch_dtype(auto_cast_type)
+            assert neuron_config.torch_dtype == map_torch_dtype(auto_cast_type)
 
 
 def _test_decoder_export_save_reload(

Original file line number	Diff line number	Diff line change
`@@ -631,6 +631,9 @@ def map_torch_dtype(dtype: Union[str, torch.dtype]):`
`631`	`631`	`"float64": torch.float64,`
`632`	`632`	`"int32": torch.int32,`
`633`	`633`	`"int64": torch.int64,`
	`634`	`+ "bf16": torch.bfloat16,`
	`635`	`+ "fp16": torch.float16,`
	`636`	`+ "fp32": torch.float32,`
`634`	`637`	`}`
`635`	`638`
`636`	`639`	`if isinstance(dtype, str) and dtype in dtype_mapping:`