intel · xin3he · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/...p/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md b/...p/huggingface_models/language-modeling/quantization/auto_round/llama3/README.md
@@ -13,18 +13,6 @@ pip install auto-round==0.9.3
 pip install -r requirements.txt
 ```
 
-**Before neural-compressor v3.7 and auto-round v0.9.1 release, please install from source for the latest updates:**
-
-```bash 
-# neural-compressor-pt
-INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master
-# auto-round
-pip install git+https://github.com/intel/[email protected]
-# other requirements
-pip install -r requirements.txt
-```
-
-
 ## Quantization
 
 ### Demo (`MXFP4`, `MXFP8`, `NVFP4`, `uNVFP4`)
@@ -92,7 +80,8 @@ Here we provide several recipes for Llama3 models. The relative accuracy loss of
 
 #### Llama 3.1 8B MXFP8
 
-AutoRound tuning helps improve the accuracy, `iters` and `nsamples` is higher than default.
+RTN (Round-to-Nearest) is enough to keep accuracy.
+
 ```bash
 # Quantize and export AutoRound format
 CUDA_VISIBLE_DEVICES=0 bash run_quant.sh --topology=Llama-3.1-8B --dtype=mxfp8 --input_model=/models/Meta-Llama-3.1-8B-Instruct --output_model=Llama-3.1-8B-MXFP8

diff --git a/.../nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/requirements.txt b/.../nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/requirements.txt
@@ -1,8 +1,7 @@
-transformers==4.56.2
-torch==2.7.0
-torchvision==0.22.0
-lm_eval==0.4.9.1
-datasets==3.6.0
-deepspeed==0.17.6
-auto-round>=0.8.0
-neural-compressor-pt>=3.6
+transformers==4.57.3
+torch==2.9.0
+torchvision==0.24.0
+lm_eval==0.4.9.2
+datasets==4.4.2
+auto-round>=0.9.3
+neural-compressor-pt>=3.7
diff --git a/...orch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh b/...orch/nlp/huggingface_models/language-modeling/quantization/auto_round/llama3/run_quant.sh
@@ -50,14 +50,13 @@ case "$TOPOLOGY" in
         case "$DTYPE" in
             "mxfp8")
                 echo "Running Llama 3.1 8B MXFP8 quantization..."
-                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --dtype MXFP8 --iters 1000 --nsamples 512 --export_path \"$OUTPUT_MODEL\""
+                CMD="python quantize.py --model_name_or_path \"$INPUT_MODEL\" $COMMON_ARGS --dtype MXFP8 --iters 0 --export_path \"$OUTPUT_MODEL\""
                 echo "Executing command: $CMD"
                 python quantize.py \
                     --model_name_or_path "$INPUT_MODEL" \
                     $COMMON_ARGS \
                     --dtype MXFP8 \
-                    --iters 1000 \
-                    --nsamples 512 \
+                    --iters 0 \
                     --export_path "$OUTPUT_MODEL"
                 ;;
             "mxfp4")