|
13 | 13 | from torch.nn import Module |
14 | 14 | import torch.nn as nn |
15 | 15 | from vllm.model_executor.layers.linear import LinearBase |
16 | | -from vllm.model_executor.layers.linear import LinearMethodBase |
17 | 16 | from vllm.model_executor.layers.linear import MergedColumnParallelLinear |
18 | 17 | from vllm.model_executor.layers.linear import QKVParallelLinear |
19 | 18 | from vllm.model_executor.layers.linear import RowParallelLinear |
|
23 | 22 | from vllm.model_executor.layers.quantization.base_config import QuantizationConfig |
24 | 23 |
|
25 | 24 | import brevitas.config as config |
26 | | -from brevitas.export.inference.handler import DynamicFloatInferenceHandler |
27 | | -from brevitas.export.inference.handler import DynamicIntInferenceHandler |
28 | | -from brevitas.export.inference.handler import FloatInferencetHandler |
29 | | -from brevitas.export.inference.handler import FloatWeightInferencetHandler |
30 | | -from brevitas.export.inference.handler import GroupwiseFloatInferenceHandler |
31 | | -from brevitas.export.inference.handler import GroupwiseFloatWeightInferenceHandler |
32 | | -from brevitas.export.inference.handler import GroupwiseIntInferenceHandler |
33 | | -from brevitas.export.inference.handler import GroupwiseIntWeightInferenceHandler |
34 | | -from brevitas.export.inference.handler import IntInferencetHandler |
35 | | -from brevitas.export.inference.handler import IntWeightInferencetHandler |
36 | 25 | from brevitas.export.inference.vLLM.handler import QuantLinear |
37 | | -from brevitas.export.manager import _set_proxy_export_handler |
38 | | -from brevitas.export.manager import _set_proxy_export_mode |
39 | | -from brevitas.export.manager import _set_recurrent_layer_export_handler |
40 | | -from brevitas.export.manager import _set_recurrent_layer_export_mode |
41 | | -from brevitas.export.manager import BaseManager |
42 | | -from brevitas.graph.calibrate import QuantizationStatusManager |
43 | | -from brevitas.nn.equalized_layer import EqualizedModule |
44 | 26 | from brevitas.nn.equalized_layer import RotatedModule |
45 | 27 | from brevitas.nn.mixin import QuantLayerMixin |
46 | 28 | from brevitas.proxy.quant_proxy import QuantProxyFromInjector |
@@ -178,8 +160,9 @@ def export(self, model, filepath): |
178 | 160 | proxy_dict['class_type'] = export_handler.__class__.__name__ |
179 | 161 | if isinstance(module, self.wrap_layers): |
180 | 162 | layer_dict['rotation_config'] = dict() |
181 | | - layer_dict['rotation_config']['rot_mat_shape'] = module.had_mat.shape[0] if module.had_mat is not None else None |
| 163 | + layer_dict['rotation_config']['rot_mat_shape'] = module.had_mat.shape[ |
| 164 | + 0] if module.had_mat is not None else None |
182 | 165 | layer_dict['rotation_config']['k'] = module.k |
183 | | - |
| 166 | + |
184 | 167 | with open(json_filename, 'w') as f: |
185 | 168 | json.dump(json_to_save, f, cls=EncodeTensor) |
0 commit comments