Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,6 @@ def from_pretrained(
fast_inference = False
break

# [TODO] For now fast_inference only works with fast_inference ie vLLM
if load_in_fp8 != False:
if not fast_inference:
raise NotImplementedError(
"Unsloth: set `fast_inference = True` when doing `load_in_fp8`."
)
# Check if 4bit is allowed specifically for AMD
if not ALLOW_BITSANDBYTES and not use_exact_model_name:
if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):
Expand Down