@@ -197,12 +197,6 @@ def test_sq_save_load_with_quantize_API(self):
197
197
q_model = quantize (fp32_model , quant_config = quant_config , run_fn = run_fn , example_inputs = example_inputs )
198
198
assert q_model is not None , "Quantization failed!"
199
199
q_model .save ("saved_results" )
200
-
201
- quant_config .folding = True
202
- fp32_model = copy .deepcopy (model )
203
- q_model = quantize (fp32_model , quant_config = quant_config , run_fn = run_fn , example_inputs = example_inputs )
204
- assert q_model is not None , "Quantization failed!"
205
- q_model .save ("saved_results" )
206
200
inc_out = q_model (example_inputs )
207
201
208
202
from neural_compressor .torch .algorithms .smooth_quant import recover_model_from_json
@@ -216,6 +210,7 @@ def test_sq_save_load_with_quantize_API(self):
216
210
217
211
# compare saved json file
218
212
fp32_model = copy .deepcopy (model )
213
+ # quant_config.folding = True is not allowed to recover with json because it will update model weights
219
214
loaded_model = recover_model_from_json (fp32_model , "saved_results/qconfig.json" , example_inputs = example_inputs )
220
215
loaded_out = loaded_model (example_inputs )
221
216
assert torch .allclose (inc_out , loaded_out , atol = 1e-05 ), "Unexpected result. Please double check."
0 commit comments