Misc fixes (#4018)

Datta0 · pre-commit-ci[bot] · danielhanchen · web-flow · commit 07dbd8620b09 · 2026-02-10T06:31:34.000-08:00
* convert print to logger * Print but cleaner * Hide model on multiple devices * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix typo transfomers -> transformers, revert MoE message change * Update MoE detection message to show num_experts and target_modules * Fix llama-cli path in save info message * target_parameters warning for moe * fix should_convert_module for llm_int8_skip_modules * fix should_convert_module for llm_int8_skip_modules * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Logging filters * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * negation * remove should_convert_module patch * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Hanchen <danielhanchen@users.noreply.github.com>
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
@@ -103,6 +103,7 @@
     DEVICE_COUNT,
     ALLOW_PREQUANTIZED_MODELS,
 )
+from ..import_fixes import UNSLOTH_ENABLE_LOGGING
 from unsloth_zoo.log import logger
 from unsloth_zoo.tokenizer_utils import (
     patch_tokenizer as _patch_tokenizer,
@@ -255,8 +256,45 @@ def filter(self, x):
         return not (self.text in x.getMessage())
 
 
+# Replace warning messages (analogous to HideLoggingMessage but for warnings.warn)
+class ReplaceWarningMessage:
+    """
+    Intercepts warnings.warn calls and replaces matching messages with Unsloth branded ones.
+    Uses a list of registered (match_text, replacement, category) rules checked in order.
+    """
+
+    _rules = []
+    _original_showwarning = None
+    _installed = False
+
+    @classmethod
+    def add_rule(cls, match_text, replacement, category = None):
+        cls._rules.append((match_text, replacement, category))
+        if not cls._installed:
+            cls._install()
+
+    @classmethod
+    def _install(cls):
+        cls._original_showwarning = warnings.showwarning
+        cls._installed = True
+
+        def _patched_showwarning(
+            message, category, filename, lineno, file = None, line = None
+        ):
+            msg_str = str(message)
+            for match_text, replacement, match_category in cls._rules:
+                if match_text in msg_str and (
+                    match_category is None or category is match_category
+                ):
+                    print(replacement)
+                    return
+            cls._original_showwarning(message, category, filename, lineno, file, line)
+
+        warnings.showwarning = _patched_showwarning
+
+
 # Stop vLLM messages
-if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "1":
+if not UNSLOTH_ENABLE_LOGGING:
     try:
         from vllm.worker.worker import logger as vllm_worker_logger
 
@@ -539,6 +577,27 @@ def remove(self):
 except:
     pass
 
+# Hide HF Hub unauthenticated request warnings
+try:
+    from huggingface_hub.utils._http import logger as hf_http_logger
+
+    hf_http_logger.addFilter(
+        HideLoggingMessage("You are sending unauthenticated requests")
+    )
+    del hf_http_logger
+except:
+    pass
+
+# Replace PEFT target_parameters warning with Unsloth branded message for MoE models
+ReplaceWarningMessage.add_rule(
+    match_text = "target_parameters",
+    replacement = (
+        "Unsloth: PEFT set target_parameters but found no matching parameters.\n"
+        "This is expected for MoE models - Unsloth handles MoE expert LoRA targeting separately."
+    ),
+    category = RuntimeWarning,
+)
+
 # Patch get_model_param_count to record correct 4bit / 8bit
 from transformers.trainer_pt_utils import is_deepspeed_zero3_enabled
 
@@ -939,7 +998,7 @@ def _is_openai_available():
     xformers_attention = None
     xformers_version = None
 except Exception as e:
-    if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "0":
+    if UNSLOTH_ENABLE_LOGGING:
         print(
             "========\nSwitching to PyTorch attention since your Xformers is broken.\n========\n"
         )
diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
@@ -3086,14 +3086,7 @@ def get_peft_model(
                 gc.collect()
                 clean_gpu_cache()
 
-        import warnings as _w
-
-        with _w.catch_warnings():
-            _w.filterwarnings(
-                "ignore",
-                message = ".*target_parameters.*were set but no parameter was matched.*",
-            )
-            model = _get_peft_model(model, lora_config)
+        model = _get_peft_model(model, lora_config)
         # Fix LoraConfig.auto_mapping is None
         fix_lora_auto_mapping(model)
 
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
@@ -1224,14 +1224,7 @@ def get_peft_model(
             model,
             use_gradient_checkpointing = use_gradient_checkpointing,
         )
-        import warnings as _w
-
-        with _w.catch_warnings():
-            _w.filterwarnings(
-                "ignore",
-                message = ".*target_parameters.*were set but no parameter was matched.*",
-            )
-            model = _get_peft_model(model, lora_config)
+        model = _get_peft_model(model, lora_config)
         # Apply QAT + LoRA if specified
         if qat_scheme is not None:
             print("Unsloth: Applying QAT to mitigate quantization degradation")