Skip to content

Commit 82f0b97

Browse files
jenchen13claude
andcommitted
Store raw hf_quant_config and convert at config.json write time
Restore `self._hf_quant_config` to hold the raw modelopt-native schema (matching upstream main) and call `convert_hf_quant_config_format` inline when writing `config.json["quantization_config"]` for newer vLLM. Drops the temporary `raw_hf_quant_config` variable and trims the post-write region to match main more closely. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 4754fb0 commit 82f0b97

1 file changed

Lines changed: 5 additions & 7 deletions

File tree

modelopt/torch/export/unified_export_megatron.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -346,19 +346,15 @@ def save_pretrained(
346346
if gathered_kv_cache_dtype is not None:
347347
quantization_config["kv_cache_quant_algo"] = gathered_kv_cache_dtype
348348

349-
raw_hf_quant_config = {
349+
self._hf_quant_config = {
350350
"producer": {
351351
"name": "modelopt",
352352
"version": __version__,
353353
},
354354
"quantization": quantization_config,
355355
}
356-
# hf_quant_config.json keeps the raw modelopt-native schema for legacy
357-
# consumers; config.json["quantization_config"] gets the vLLM-facing
358-
# converted schema below.
359-
self._hf_quant_config = convert_hf_quant_config_format(raw_hf_quant_config)
360356
with open(save_directory + "/hf_quant_config.json", "w") as f:
361-
json.dump(raw_hf_quant_config, f, indent=4)
357+
json.dump(self._hf_quant_config, f, indent=4)
362358

363359
# Add multimodal components to state_dict. Since only support decoder model quantization,
364360
# no changes will be made to the multimodal components. We copy the multimodal components
@@ -378,7 +374,9 @@ def save_pretrained(
378374
if self._hf_quant_config and os.path.exists(config_json_file):
379375
with open(config_json_file) as f:
380376
config_dict = json.load(f)
381-
config_dict["quantization_config"] = self._hf_quant_config
377+
config_dict["quantization_config"] = convert_hf_quant_config_format(
378+
self._hf_quant_config
379+
)
382380
with open(config_json_file, "w") as f:
383381
json.dump(config_dict, f, indent=4)
384382

0 commit comments

Comments
 (0)