Skip to content

Commit daa5174

Browse files
authored
use default dynamic quantization group size (#3495)
### Changes Removed explicit disabling of dynamic quantization Updated test durations from the latest job ### Reason for changes test default configuration ### Related tickets 157896 ### Tests no change in test examples - https://github.com/openvinotoolkit/nncf/actions/runs/15041910960 no change in conformance PTWC - https://github.com/openvinotoolkit/nncf/actions/runs/15041905952
1 parent 955a2b9 commit daa5174

File tree

8 files changed

+37
-42
lines changed

8 files changed

+37
-42
lines changed

examples/llm_compression/openvino/smollm2_360m_fp8/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,7 @@ def main():
116116
model.save_pretrained(OUTPUT_DIR)
117117
tokenizer.save_pretrained(OUTPUT_DIR)
118118

119-
model = OVModelForCausalLM.from_pretrained(
120-
OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0", "INFERENCE_PRECISION_HINT": "f32"}
121-
)
119+
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"INFERENCE_PRECISION_HINT": "f32"})
122120
answers_by_questions = generate_answers(questions, model, tokenizer)
123121
print(f"Optimized model outputs:\n{answers_by_questions}\n")
124122
return answers_by_questions

examples/llm_compression/openvino/tiny_llama/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ def transform_fn(data, tokenizer):
5757
)
5858
model.save_pretrained(OUTPUT_DIR)
5959

60-
model = OVModelForCausalLM.from_pretrained(
61-
OUTPUT_DIR, ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0", "KV_CACHE_PRECISION": "f16"}
62-
)
60+
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, ov_config={"KV_CACHE_PRECISION": "f16"})
6361
input_ids = tokenizer("What is PyTorch?", return_tensors="pt").to(device=model.device)
6462

6563
start_t = time.time()

examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ def main():
248248
"PERFORMANCE_HINT": "LATENCY",
249249
"NUM_STREAMS": "1",
250250
"CACHE_DIR": "",
251-
"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0",
252251
"KV_CACHE_PRECISION": "f16",
253252
}
254253
model = OVModelForCausalLM.from_pretrained(

examples/llm_compression/torch/qat_with_lora/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def export_to_openvino(pretrained: str, ckpt_file: Path, ir_dir: Path) -> OVMode
272272
trust_remote_code=True,
273273
load_in_8bit=False,
274274
compile=True,
275-
ov_config={"KV_CACHE_PRECISION": "f16", "DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"},
275+
ov_config={"KV_CACHE_PRECISION": "f16"},
276276
)
277277

278278

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
{
2-
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression]": 222.974,
3-
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression_synthetic]": 873.780,
4-
"tests/cross_fw/examples/test_examples.py::test_examples[llm_tune_params]": 1018.932,
5-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_mobilenet_v2]": 178.509,
6-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_yolo8_quantize_with_accuracy_control]": 292.766,
7-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_anomaly_stfpm_quantize_with_accuracy_control]": 443.025,
8-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_mobilenet_v2_quantize]": 169.789,
9-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize]": 170.593,
10-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize_with_accuracy_control]": 205.533,
11-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_tensorflow_mobilenet_v2]": 149.202,
12-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_mobilenet_v2]": 192.227,
13-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_ssd300_vgg16]": 231.613,
14-
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_anomalib]": 478.797,
15-
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_resnet18]": 1251.144,
16-
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_fx_resnet18]": 412.243,
17-
"tests/cross_fw/examples/test_examples.py::test_examples[fp8_llm_quantization]": 229.69,
18-
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_tensorflow_mobilenet_v2]": 1500.00,
19-
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression_qat_with_lora]": 665
2+
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression]": 197,
3+
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression_synthetic]": 717,
4+
"tests/cross_fw/examples/test_examples.py::test_examples[llm_tune_params]": 824,
5+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_mobilenet_v2]": 207,
6+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_yolo8_quantize_with_accuracy_control]": 800,
7+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_anomaly_stfpm_quantize_with_accuracy_control]": 455,
8+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_mobilenet_v2_quantize]": 171,
9+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize]": 192,
10+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize_with_accuracy_control]": 203,
11+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_tensorflow_mobilenet_v2]": 148,
12+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_mobilenet_v2]": 185,
13+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_ssd300_vgg16]": 221,
14+
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_anomalib]": 547,
15+
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_resnet18]": 1189,
16+
"tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_fx_resnet18]": 178,
17+
"tests/cross_fw/examples/test_examples.py::test_examples[fp8_llm_quantization]": 226,
18+
"tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_tensorflow_mobilenet_v2]": 891,
19+
"tests/cross_fw/examples/test_examples.py::test_examples[llm_compression_qat_with_lora]": 591
2020
}
Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
{
2-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_awq_backup_mode_none_backend_OV]": 269,
3-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_scale_estimation_backend_OV]": 421,
4-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_scale_estimation_stateful_backend_OV]": 374,
5-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_stateful_backend_OV]": 243,
6-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_backend_OV]": 190,
7-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_gptq_scale_estimation_stateful_backend_OV]": 1463,
8-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_lora_stateful_backend_OV]": 483,
2+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_awq_backup_mode_none_backend_OV]": 249,
3+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_scale_estimation_backend_OV]": 368,
4+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_scale_estimation_stateful_backend_OV]": 371,
5+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_awq_stateful_backend_OV]": 205,
6+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_backend_OV]": 206,
7+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_gptq_scale_estimation_stateful_backend_OV]": 1161,
8+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_aware_lora_stateful_backend_OV]": 473,
99
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_backend_FP32]": 0,
10-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_backend_OV]": 196,
11-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_int4_data_free_backend_TORCH]": 133,
12-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_int8_data_free_backend_TORCH]": 154,
13-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_NF4_scale_estimation_stateful_per_channel_backend_OV]": 256,
14-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_scale_estimation_per_channel_backend_OV]": 258,
15-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_awq_backend_OV]": 200,
16-
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_awq_backend_TORCH]": 200
10+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_backend_OV]": 187,
11+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_int4_data_free_backend_TORCH]": 123,
12+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_int8_data_free_backend_TORCH]": 165,
13+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_NF4_scale_estimation_stateful_per_channel_backend_OV]": 193,
14+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_scale_estimation_per_channel_backend_OV]": 251,
15+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_awq_backend_OV]": 164,
16+
"tests/post_training/test_quantize_conformance.py::test_weight_compression[tinyllama_data_free_awq_backend_TORCH]": 210
1717
}

tests/post_training/pipelines/lm_weight_compression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def _validate(self) -> None:
366366
load_in_8bit=False,
367367
compile=False,
368368
stateful=is_stateful,
369-
ov_config={"DYNAMIC_QUANTIZATION_GROUP_SIZE": "0", "KV_CACHE_PRECISION": "f16"},
369+
ov_config={"KV_CACHE_PRECISION": "f16"},
370370
)
371371
if self.backend == BackendType.FX_TORCH:
372372
compressed_model_hf = FXAutoModelForCausalLM(self.model, self.model_config)

tests/torch2/function_hook/quantization/test_fq_lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def get_ov_model(model: AutoModelForCausalLM, tmp_path: str) -> OVModelForCausal
8282
trust_remote_code=True,
8383
load_in_8bit=False,
8484
compile=True,
85-
ov_config={"KV_CACHE_PRECISION": "f16", "DYNAMIC_QUANTIZATION_GROUP_SIZE": "0"},
85+
ov_config={"KV_CACHE_PRECISION": "f16"},
8686
)
8787

8888

0 commit comments

Comments
 (0)