Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/autoround/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Load the model using `AutoModelForCausalLM` for handling quantized saving and lo
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
```

Expand Down
2 changes: 1 addition & 1 deletion examples/autoround/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Select model and load it.
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/awq/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ recipe = [
To use your own model, start with an existing example change the `model_id` to match your own model stub.
```python
model_id = "path/to/your/model"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
```

## Adding Mappings ##
Expand Down
2 changes: 1 addition & 1 deletion examples/awq/llama_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Select model and load it.
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/awq/qwen3-vl-30b-a3b-Instruct-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Load model.
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True
MODEL_ID, dtype=torch.bfloat16, device_map=None, trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)

Expand Down
2 changes: 1 addition & 1 deletion examples/awq/qwen3_coder_moe_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def preprocess(example):


if __name__ == "__main__":
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

###
Expand Down
2 changes: 1 addition & 1 deletion examples/awq/qwen3_moe_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Select model and load it.
MODEL_ID = "Qwen/Qwen3-30B-A3B"

model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/big_models_with_sequential_onloading/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The Llama 3.3 70b is larger than 80 GB, surpassing the size of 1 A100. However,

```python
model_id = "meta-llama/Llama-3.3-70B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", device_map=None)
```

The model is first loaded onto the `cpu`, as indicated through the use of `None` for the `device_map` argument in the `from_pretrained` method when loading the model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
model_id = "meta-llama/Llama-3.3-70B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
dtype="auto",
device_map=None,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down
2 changes: 1 addition & 1 deletion examples/compressed_inference/fp8_compressed_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

compressed_model = AutoModelForCausalLM.from_pretrained(
MODEL_STUB,
torch_dtype="auto",
dtype="auto",
device_map="auto",
)

Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_audio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ This directory contains example scripts for quantizing a variety of audio langua
To use your own multimodal modal, start with an existing example change the `model_id` to match your own model stub.
```python3
model_id = "path/to/your/model"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
```

## Customizing GPTQModifier Parameters ##
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_audio/whisper_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Select model and load it.
MODEL_ID = "openai/whisper-large-v3"

model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID, dtype="auto")
model.config.forced_decoder_ids = None
processor = WhisperProcessor.from_pretrained(MODEL_ID)

Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ This directory contains example scripts for quantizing a variety of vision-langu
To use your own multimodal modal, start with an existing example change the `model_id` to match your own model stub.
```python3
model_id = "path/to/your/model"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
```

## Customizing GPTQModifier Parameters ##
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/README_internvl3.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This file shows the example of quantizing InternVL3-8B-hf.

```python
model_id = "OpenGVLab/InternVL3-8B-hf"
model = AutoModelForImageTextToText.from_pretrained(model_id, torch_dtype=torch.bfloat16)
model = AutoModelForImageTextToText.from_pretrained(model_id, dtype=torch.bfloat16)
processor = AutoProcessor.from_pretrained(model_id)
```

Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/gemma3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# Load model.
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/idefics3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Load model.
model_id = "HuggingFaceM4/Idefics3-8B-Llama3" # or "HuggingFaceTB/SmolVLM-Instruct"
model = Idefics3ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Idefics3ForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/internvl3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Load model.
model_id = "OpenGVLab/InternVL3-8B-hf"
model = AutoModelForImageTextToText.from_pretrained(
model_id, torch_dtype=torch.bfloat16
model_id, dtype=torch.bfloat16
)
processor = AutoProcessor.from_pretrained(model_id)

Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/llama4_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Select model and load it.
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
model = Llama4ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Llama4ForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = Llama4Processor.from_pretrained(model_id)
# MoE calibration is now handled automatically by the pipeline.
# The `SequentialLlama4TextMoe` modules (from `llmcompressor.modeling.llama4`)
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/llava_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# Load model.
model_id = "llava-hf/llava-1.5-7b-hf"
model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = LlavaForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/mistral3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Load model.
model_id = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
model = Mistral3ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Mistral3ForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Use a custom calibration chat template, rather than the overly-verbose default
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/mllama_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# Load model.
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = MllamaForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/phi3_vision_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
model_id = "microsoft/Phi-3-vision-128k-instruct"
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
dtype="auto",
trust_remote_code=True,
_attn_implementation="eager",
)
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/pixtral_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# Load model.
model_id = "mgoin/pixtral-12b"
model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = LlavaForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/qwen2_vl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Load model.
model_id = "Qwen/Qwen2-VL-2B-Instruct"
model = Qwen2VLForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Qwen2VLForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal_vision/qwen_2_5_vl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Load model.
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_2of4_sparse_w4a16/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ from llmcompressor import oneshot, train

# load the model in as bfloat16 to save on memory and compute
model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.bfloat16)
model = AutoModelForCausalLM.from_pretrained(model_stub, dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_stub)

# uses LLM Compressor's built-in preprocessing for ultra chat
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# load the model in as bfloat16 to save on memory and compute
model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.bfloat16)
model = AutoModelForCausalLM.from_pretrained(model_stub, dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_stub)

# uses LLM Compressor's built-in preprocessing for ultra chat
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_kv_cache/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Load the model using `AutoModelForCausalLM`:
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
```

Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_kv_cache/gemma2_fp8_kv_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# Select model and load it.
MODEL_ID = "google/gemma-2-9b-it"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_kv_cache/llama3_fp8_kv_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Select model and load it.
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_kv_cache/phi3.5_fp8_kv_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Phi-3.5 is a special case for KV cache quantization because it has
# fused QKV linear layers.
MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Select calibration dataset.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Configure the quantization algorithm and scheme.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def parse_args():

# Select model and load it.
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Select calibration dataset.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)


Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a16/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Load the model using `AutoModelForCausalLM` for handling quantized saving and lo
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
```

Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a16/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Select model and load it.
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Select calibration dataset.
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a16_fp4/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Configure the quantization algorithm and scheme.
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a16_fp4/qwen3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Load model.
MODEL_ID = "Qwen/Qwen3-32B"
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, torch_dtype="auto", trust_remote_code=True
MODEL_ID, dtype="auto", trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a4_fp4/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Load the model using `AutoModelForCausalLM` for handling quantized saving and lo
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
```

Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a4_fp4/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)


Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a4_fp4/llama4_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Select model and load it.
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
model = Llama4ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
model = Llama4ForConditionalGeneration.from_pretrained(model_id, dtype="auto")
processor = Llama4Processor.from_pretrained(model_id)
# MoE calibration is now handled automatically by the pipeline.
# The `SequentialLlama4TextMoe` modules (from `llmcompressor.modeling.llama4`)
Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a4_fp4/qwen3_next_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct"

# Load model.
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)


Expand Down
2 changes: 1 addition & 1 deletion examples/quantization_w4a4_fp4/qwen3_vl_moe_w4a4_fp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


# Load model.
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, dtype="auto")
processor = AutoProcessor.from_pretrained(MODEL_ID)

DATASET_ID = "neuralmagic/calibration"
Expand Down
Loading