Temp fix

Giuseppe5 · Giuseppe5 · commit c0a4484476b2 · 2025-08-05T17:47:40.000+01:00
diff --git a/src/brevitas/graph/base.py b/src/brevitas/graph/base.py
@@ -232,6 +232,7 @@ def init_new_module(self, old_module: Module, name: str = None, load_state_dict:
         is_assign_supported = 'assign' in inspect.signature(
             old_module.load_state_dict).parameters.keys()
         if 'device' in new_module_signature_keys and is_assign_supported and load_state_dict:
+            new_kwargs['quant_device'] = new_kwargs['device']
             new_kwargs['device'] = torch.device("meta")
 
         # init the new module
diff --git a/src/brevitas/nn/mixin/base.py b/src/brevitas/nn/mixin/base.py
@@ -47,9 +47,16 @@ def __init__(
 
         # `device` and `dtype` are special keywords, propagated through no matter the prefix
         # `None` is the default value for these parameters when they are not specified
-        special_keys = ['device', 'dtype']
-        for key in special_keys:
-            filtered_kwargs[key] = kwargs.get(key, None)
+
+        # When applying quantization, if we use `meta` device to avoid memory duplication, we need
+        # to keep track of the original device for the eventual quantization parameters
+        # If not, they would be stuck in `meta` and error out
+        device = kwargs.get('device') if kwargs.get('device') not in [
+            'meta', torch.device('meta')] else kwargs.get('quant_device')
+        dtype = kwargs.get('dtype')
+        filtered_kwargs['device'] = device
+        filtered_kwargs['dtype'] = dtype
+
         if quant is None:
             quant_injector = none_quant_injector.let(**filtered_kwargs)
             quant = quant_injector.proxy_class(self, quant_injector)
diff --git a/src/brevitas/nn/quant_conv.py b/src/brevitas/nn/quant_conv.py
@@ -45,9 +45,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         # avoid an init error in the super class by setting padding to 0
         if padding_mode == 'zeros' and padding == 'same' and (stride > 1 if isinstance(
                 stride, int) else any(map(lambda x: x > 1, stride))):
@@ -75,6 +75,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self.is_same_padded_strided = is_same_padded_strided
 
@@ -132,9 +134,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         # avoid an init error in the super class by setting padding to 0
         if padding_mode == 'zeros' and padding == 'same' and (stride > 1 if isinstance(
                 stride, int) else any(map(lambda x: x > 1, stride))):
@@ -162,6 +164,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self.is_same_padded_strided = is_same_padded_strided
 
@@ -221,9 +225,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         # avoid an init error in the super class by setting padding to 0
         if padding_mode == 'zeros' and padding == 'same' and (stride > 1 if isinstance(
                 stride, int) else any(map(lambda x: x > 1, stride))):
@@ -251,6 +255,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self.is_same_padded_strided = is_same_padded_strided
 
diff --git a/src/brevitas/nn/quant_convtranspose.py b/src/brevitas/nn/quant_convtranspose.py
@@ -50,9 +50,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         ConvTranspose1d.__init__(
             self,
             in_channels=in_channels,
@@ -74,6 +74,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self._output_size = None
 
@@ -145,9 +147,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         ConvTranspose2d.__init__(
             self,
             in_channels=in_channels,
@@ -169,6 +171,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self._output_size = None
 
@@ -240,9 +244,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         ConvTranspose3d.__init__(
             self,
             in_channels=in_channels,
@@ -264,6 +268,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         self._output_size = None
 
diff --git a/src/brevitas/nn/quant_embedding.py b/src/brevitas/nn/quant_embedding.py
@@ -32,9 +32,9 @@ def __init__(
             _weight: Optional[Tensor] = None,
             weight_quant: WeightQuantType = Int8WeightPerTensorFloat,
             return_quant_tensor=False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         Embedding.__init__(
             self,
             num_embeddings=num_embeddings,
@@ -47,7 +47,8 @@ def __init__(
             _weight=_weight,
             device=device,
             dtype=dtype)
-        QuantWeightMixin.__init__(self, weight_quant=weight_quant, **kwargs)
+        QuantWeightMixin.__init__(
+            self, weight_quant=weight_quant, device=device, dtype=dtype, **kwargs)
         self.accept_quant_tensor = False
         self.return_quant_tensor = return_quant_tensor
 
diff --git a/src/brevitas/nn/quant_linear.py b/src/brevitas/nn/quant_linear.py
@@ -35,9 +35,9 @@ def __init__(
             input_quant: Optional[ActQuantType] = None,
             output_quant: Optional[ActQuantType] = None,
             return_quant_tensor: bool = False,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
             **kwargs) -> None:
-        device = kwargs.get('device', None)
-        dtype = kwargs.get('dtype', None)
         Linear.__init__(self, in_features, out_features, bias, device=device, dtype=dtype)
         QuantWBIOL.__init__(
             self,
@@ -46,6 +46,8 @@ def __init__(
             input_quant=input_quant,
             output_quant=output_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
 
     @property
diff --git a/src/brevitas/nn/quant_rnn.py b/src/brevitas/nn/quant_rnn.py
@@ -845,6 +845,8 @@ def __init__(
             gate_acc_quant=Int8ActPerTensorFloat,
             shared_input_hidden_weights=False,
             return_quant_tensor: bool = False,
+            dtype: Optional[torch.dtype] = None,
+            device: Optional[torch.device] = None,
             **kwargs):
         super(QuantRNN, self).__init__(
             layer_impl=_QuantRNNLayer,
@@ -861,6 +863,8 @@ def __init__(
             gate_acc_quant=gate_acc_quant,
             shared_input_hidden_weights=shared_input_hidden_weights,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
 
 
@@ -888,6 +892,8 @@ def __init__(
             shared_intra_layer_gate_acc_quant=False,
             shared_cell_state_quant=True,
             return_quant_tensor: bool = False,
+            dtype: Optional[torch.dtype] = None,
+            device: Optional[torch.device] = None,
             **kwargs):
         super(QuantLSTM, self).__init__(
             layer_impl=_QuantLSTMLayer,
@@ -910,6 +916,8 @@ def __init__(
             shared_intra_layer_gate_acc_quant=shared_intra_layer_gate_acc_quant,
             shared_cell_state_quant=shared_cell_state_quant,
             return_quant_tensor=return_quant_tensor,
+            device=device,
+            dtype=dtype,
             **kwargs)
         if cat_output_cell_states and cell_state_quant is not None and not shared_cell_state_quant:
             raise RuntimeError("Concatenating cell states requires shared cell quantizers.")
diff --git a/src/brevitas/nn/quant_sdpa.py b/src/brevitas/nn/quant_sdpa.py
@@ -142,8 +142,12 @@ def __init__(
         self.pre_process_k = pre_process_k
         self.pre_process_v = pre_process_v
 
+        special_keys = ['device', 'dtype', 'quant_dtype']
+
         def filter_kwargs(prefix):
-            return {k[len(prefix):]: v for k, v in kwargs.items() if k.startswith(prefix)}
+            return {
+                k[len(prefix):]: v for k,
+                v in kwargs.items() if k.startswith(prefix) or k in special_keys}
 
         self.q_scaled_quant = QuantIdentity(act_quant=q_scaled_quant, **filter_kwargs('q_scaled_'))
         self.k_transposed_quant = QuantIdentity(
diff --git a/src/brevitas/quant/solver/weight.py b/src/brevitas/quant/solver/weight.py
@@ -104,7 +104,6 @@ class WeightQuantSolver(SolveStatsReduceDimFromEnum,
                         SolveParameterScalingShape,
                         SolveWeightScalingPerOutputChannelShapeFromModule,
                         SolveWeightTensorQuantFromEnum,
-                        SolveDtypeDeviceFromTrackedParameterList,
                         SolveInputViewImpl):
     """
     Translate enum and shape directives to weight-specific quantization core modules.