unslothai · danielhanchen · Feb 25, 2026 · Jan 19, 2026 · Jan 19, 2026 · Feb 16, 2026
@@ -248,12 +248,6 @@ def from_pretrained(
                         fast_inference = False
                         break
 
-        # [TODO] For now fast_inference only works with fast_inference ie vLLM
-        if load_in_fp8 != False:
-            if not fast_inference:
-                raise NotImplementedError(
-                    "Unsloth: set `fast_inference = True` when doing `load_in_fp8`."
-                )
         # Check if 4bit is allowed specifically for AMD
         if not ALLOW_BITSANDBYTES and not use_exact_model_name:
             if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):