Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,14 @@ def save_quantized_as_autoround(

regex_config = quantization_config.pop("regex_config")
if regex_config is not None:
for name in regex_config.keys():
for name, cfg in regex_config.items():
regex_name = to_standard_regex(name)
extra_config[regex_name] = {**{k: regex_config[name][k] for k in scheme_keys}}
neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
if len(neq_keys) > 0:
extra_config[regex_name] = {}
for key in neq_keys:
if cfg.get(key) is not None:
extra_config[regex_name][key] = cfg[key]

if len(extra_config) > 0:
quantization_config["extra_config"] = extra_config
Expand Down
2 changes: 1 addition & 1 deletion auto_round/export/export_to_autoround/export_to_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def save_quantized_as_autoround(
neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
if len(neq_keys) > 0:
extra_config[layer_name] = {}
for key in scheme_keys:
for key in neq_keys:
if cfg[key] is not None:
extra_config[layer_name][key] = cfg[key]

Expand Down
11 changes: 8 additions & 3 deletions auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,20 @@ def save_quantized_as_fp(
neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
if len(neq_keys) > 0:
extra_config[layer_name] = {}
for key in scheme_keys:
for key in neq_keys:
if cfg.get(key, None) is not None:
extra_config[layer_name][key] = cfg.get(key, None)

regex_config = quantization_config.pop("regex_config")
if regex_config is not None:
for name in regex_config.keys():
for name, cfg in regex_config.items():
regex_name = to_standard_regex(name)
extra_config[regex_name] = {**{k: regex_config[name][k] for k in scheme_keys}}
neq_keys = check_neq_config(cfg, **{k: quantization_config[k] for k in scheme_keys})
if len(neq_keys) > 0:
extra_config[regex_name] = {}
for key in neq_keys:
if cfg.get(key) is not None:
extra_config[regex_name][key] = cfg[key]

if len(extra_config) > 0:
quantization_config["extra_config"] = extra_config
Expand Down
31 changes: 14 additions & 17 deletions test/test_cpu/quantization/test_mix_bits.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,17 @@ def test_mixed_autoround_format(self, dataloader):
layer_config=layer_config,
)
quantized_model_path = "./saved"
compressed_model = autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
config_file = Path(quantized_model_path) / "config.json"
with open(config_file, "r", encoding="utf-8") as f:
config = json.load(f)
quant_config = config.get("quantization_config", {})
extra_config = quant_config.get("extra_config", {})
# check extra_config only saved attributes differing from Scheme values
assert "act_bits" not in extra_config[".*fc1.*"].keys() ## TODO refine this assert
assert "group_size" not in extra_config[".*fc1.*"].keys()
assert "act_bits" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
assert "group_size" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="cpu")
assert model.model.decoder.layers[0].self_attn.k_proj.bits == 8
assert model.model.decoder.layers[0].self_attn.q_proj.bits == 3
Expand Down Expand Up @@ -167,33 +177,20 @@ def test_mixed_ar_format_part_name_hf_loading(self, dataloader):
# remove old extra_config(which contains full name layer configs), only test regex config loading
new_extra_config = {
".*fc1.*": { # standard regex
"act_bits": 16,
"act_data_type": "float",
"act_dynamic": True,
"act_group_size": 128,
"act_sym": True,
"bits": 16,
"data_type": "int",
"group_size": 128,
"sym": True,
},
"k_proj": { # part name
"act_bits": 16,
"act_data_type": "float",
"act_dynamic": True,
"act_group_size": 128,
"act_sym": True,
"bits": 8,
"data_type": "int",
"group_size": 128,
"sym": True,
},
}
config_file = Path(quantized_model_path) / "config.json"
with open(config_file, "r", encoding="utf-8") as f:
config = json.load(f)
quant_config = config.get("quantization_config", {})
old_extra_config = quant_config.get("extra_config", {})
# check extra_config only saved attributes differing from Scheme values
assert "act_bits" not in old_extra_config[".*fc1.*"].keys()
assert "group_size" not in old_extra_config[".*fc1.*"].keys()
quant_config["extra_config"] = new_extra_config
config["quantization_config"] = quant_config
with open(config_file, "w", encoding="utf-8") as f:
Expand Down
32 changes: 16 additions & 16 deletions test/test_cuda/quantization/test_mix_bits.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,18 @@ def test_mixed_autoround_format(self, tiny_opt_model_path, dataloader):
)
quantized_model_path = "self.save_dir"
autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round")
config_file = Path(quantized_model_path) / "config.json"
with open(config_file, "r", encoding="utf-8") as f:
config = json.load(f)
quant_config = config.get("quantization_config", {})
extra_config = quant_config.get("extra_config", {})
# check extra_config only saved attributes differing from Scheme values
assert "act_bits" not in extra_config[".*fc1.*"].keys() ## TODO refine this assert
assert "group_size" not in extra_config[".*fc1.*"].keys()
assert "act_bits" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
assert "group_size" not in extra_config["model.decoder.layers.0.self_attn.k_proj"].keys()
assert "group_size" not in extra_config["model.decoder.layers.1.self_attn.q_proj"].keys()
assert "bits" in extra_config["model.decoder.layers.1.self_attn.q_proj"].keys()
model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="auto")
assert model.model.decoder.layers[0].self_attn.k_proj.bits == 8
assert model.model.decoder.layers[0].self_attn.q_proj.bits == 3
Expand Down Expand Up @@ -164,33 +176,21 @@ def test_mixed_ar_format_part_name_hf_loading(self, tiny_opt_model_path, dataloa
# remove old extra_config(which contains full name layer configs), only test regex config loading
new_extra_config = {
".*fc1.*": { # standard regex
"act_bits": 16,
"act_data_type": "float",
"act_dynamic": True,
"act_group_size": 128,
"act_sym": True,
"bits": 16,
"data_type": "int",
"group_size": 128,
"sym": True,
},
"k_proj": { # part name
"act_bits": 16,
"act_data_type": "float",
"act_dynamic": True,
"act_group_size": 128,
"act_sym": True,
"bits": 8,
"data_type": "int",
"group_size": 128,
"sym": True,
},
}
config_file = Path(quantized_model_path) / "config.json"
with open(config_file, "r", encoding="utf-8") as f:
config = json.load(f)
quant_config = config.get("quantization_config", {})
old_extra_config = quant_config.get("extra_config", {})
# check extra_config only saved attributes differing from Scheme values
assert "sym" not in old_extra_config[".*fc1.*"].keys()
assert "act_dynamic" not in old_extra_config[".*fc1.*"].keys()
assert "group_size" not in old_extra_config[".*fc1.*"].keys()
quant_config["extra_config"] = new_extra_config
config["quantization_config"] = quant_config
with open(config_file, "w", encoding="utf-8") as f:
Expand Down
Loading