intel · WeiweiZhang1 · Jan 23, 2026 · Jan 23, 2026
diff --git a/auto_round/export/export_to_autoround/export.py b/auto_round/export/export_to_autoround/export.py
@@ -294,9 +294,14 @@ def save_quantized_as_autoround(
 
     regex_config = quantization_config.pop("regex_config")
     if regex_config is not None:
-        for name in regex_config.keys():
+        for name, cfg in regex_config.items():
             regex_name = to_standard_regex(name)
-            extra_config[regex_name] = {**{k: regex_config[name][k] for k in scheme_keys}}
+            neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
+            if len(neq_keys) > 0:
+                extra_config[regex_name] = {}
+                for key in neq_keys:
+                    if cfg.get(key) is not None:
+                        extra_config[regex_name][key] = cfg[key]
 
     if len(extra_config) > 0:
         quantization_config["extra_config"] = extra_config

diff --git a/auto_round/export/export_to_autoround/export_to_fp8.py b/auto_round/export/export_to_autoround/export_to_fp8.py
@@ -188,7 +188,7 @@ def save_quantized_as_autoround(
             neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
             if len(neq_keys) > 0:
                 extra_config[layer_name] = {}
-                for key in scheme_keys:
+                for key in neq_keys:
                     if cfg[key] is not None:
                         extra_config[layer_name][key] = cfg[key]
 

diff --git a/auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py b/auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py
@@ -210,15 +210,20 @@ def save_quantized_as_fp(
             neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
             if len(neq_keys) > 0:
                 extra_config[layer_name] = {}
-                for key in scheme_keys:
+                for key in neq_keys:
                     if cfg.get(key, None) is not None:
                         extra_config[layer_name][key] = cfg.get(key, None)
 
     regex_config = quantization_config.pop("regex_config")
     if regex_config is not None:
-        for name in regex_config.keys():
+        for name, cfg in regex_config.items():
             regex_name = to_standard_regex(name)
-            extra_config[regex_name] = {**{k: regex_config[name][k] for k in scheme_keys}}
+            neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
+            if len(neq_keys) > 0:
+                extra_config[regex_name] = {}
+                for key in neq_keys:
+                    if cfg.get(key) is not None:
+                        extra_config[regex_name][key] = cfg[key]
 
     if len(extra_config) > 0:
         quantization_config["extra_config"] = extra_config

diff --git a/test/test_cpu/quantization/test_mix_bits.py b/test/test_cpu/quantization/test_mix_bits.py
@@ -112,7 +112,17 @@ def test_mixed_autoround_format(self, dataloader):
             layer_config=layer_config,
         )
         quantized_model_path = "./saved"
-        compressed_model = autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
+        autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
+        config_file = Path(quantized_model_path) / "config.json"
+        with open(config_file, "r", encoding="utf-8") as f:
+            config = json.load(f)
+        quant_config = config.get("quantization_config", {})
+        extra_config = quant_config.get("extra_config", {})
+        # check extra_config only saved attributes differing from Scheme values
+        assert "act_bits" not in extra_config[".*fc1.*"].keys()  ## TODO refine this assert
+        assert "group_size" not in extra_config[".*fc1.*"].keys()
+        assert "act_bits" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
+        assert "group_size" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
         model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="cpu")
         assert model.model.decoder.layers[0].self_attn.k_proj.bits == 8
         assert model.model.decoder.layers[0].self_attn.q_proj.bits == 3
@@ -167,33 +177,20 @@ def test_mixed_ar_format_part_name_hf_loading(self, dataloader):
         # remove old extra_config(which contains full name layer configs), only test regex config loading
         new_extra_config = {
             ".*fc1.*": {  # standard regex
-                "act_bits": 16,
-                "act_data_type": "float",
-                "act_dynamic": True,
-                "act_group_size": 128,
-                "act_sym": True,
                 "bits": 16,
-                "data_type": "int",
-                "group_size": 128,
-                "sym": True,
             },
             "k_proj": {  # part name
-                "act_bits": 16,
-                "act_data_type": "float",
-                "act_dynamic": True,
-                "act_group_size": 128,
-                "act_sym": True,
                 "bits": 8,
-                "data_type": "int",
-                "group_size": 128,
-                "sym": True,
             },
         }
         config_file = Path(quantized_model_path) / "config.json"
         with open(config_file, "r", encoding="utf-8") as f:
             config = json.load(f)
         quant_config = config.get("quantization_config", {})
         old_extra_config = quant_config.get("extra_config", {})
+        # check extra_config only saved attributes differing from Scheme values
+        assert "act_bits" not in old_extra_config[".*fc1.*"].keys()
+        assert "group_size" not in old_extra_config[".*fc1.*"].keys()
         quant_config["extra_config"] = new_extra_config
         config["quantization_config"] = quant_config
         with open(config_file, "w", encoding="utf-8") as f:

diff --git a/test/test_cuda/quantization/test_mix_bits.py b/test/test_cuda/quantization/test_mix_bits.py
@@ -108,6 +108,18 @@ def test_mixed_autoround_format(self, tiny_opt_model_path, dataloader):
         )
         quantized_model_path = "self.save_dir"
         autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
+        config_file = Path(quantized_model_path) / "config.json"
+        with open(config_file, "r", encoding="utf-8") as f:
+            config = json.load(f)
+        quant_config = config.get("quantization_config", {})
+        extra_config = quant_config.get("extra_config", {})
+        # check extra_config only saved attributes differing from Scheme values
+        assert "act_bits" not in extra_config[".*fc1.*"].keys()  ## TODO refine this assert
+        assert "group_size" not in extra_config[".*fc1.*"].keys()
+        assert "act_bits" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
+        assert "group_size" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
+        assert "group_size" not in extra_config["model.decoder.layers.1.self_attn.q_proj"].keys()
+        assert "bits" in extra_config["model.decoder.layers.1.self_attn.q_proj"].keys()
         model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="auto")
         assert model.model.decoder.layers[0].self_attn.k_proj.bits == 8
         assert model.model.decoder.layers[0].self_attn.q_proj.bits == 3
@@ -164,33 +176,21 @@ def test_mixed_ar_format_part_name_hf_loading(self, tiny_opt_model_path, dataloa
         # remove old extra_config(which contains full name layer configs), only test regex config loading
         new_extra_config = {
             ".*fc1.*": {  # standard regex
-                "act_bits": 16,
-                "act_data_type": "float",
-                "act_dynamic": True,
-                "act_group_size": 128,
-                "act_sym": True,
                 "bits": 16,
-                "data_type": "int",
-                "group_size": 128,
-                "sym": True,
             },
             "k_proj": {  # part name
-                "act_bits": 16,
-                "act_data_type": "float",
-                "act_dynamic": True,
-                "act_group_size": 128,
-                "act_sym": True,
                 "bits": 8,
-                "data_type": "int",
-                "group_size": 128,
-                "sym": True,
             },
         }
         config_file = Path(quantized_model_path) / "config.json"
         with open(config_file, "r", encoding="utf-8") as f:
             config = json.load(f)
         quant_config = config.get("quantization_config", {})
         old_extra_config = quant_config.get("extra_config", {})
+        # check extra_config only saved attributes differing from Scheme values
+        assert "sym" not in old_extra_config[".*fc1.*"].keys()
+        assert "act_dynamic" not in old_extra_config[".*fc1.*"].keys()
+        assert "group_size" not in old_extra_config[".*fc1.*"].keys()
         quant_config["extra_config"] = new_extra_config
         config["quantization_config"] = quant_config
         with open(config_file, "w", encoding="utf-8") as f: