fix regex

pbarker · pbarker · commit e347c26e8928 · 2025-05-05T13:22:07.000-06:00
diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py
@@ -24,134 +24,156 @@
 ]
 
 import inspect
-import torch
 import os
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union
-from collections import OrderedDict
 import re
+from collections import OrderedDict
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union
+
+import torch
 
 # Skip some modules sensitive to quantization
 SKIP_QUANTIZATION_MODULES = [
     "lm_head",
-    "multi_modal_projector", # Llama 3.2 Vision, Pixtral, Llava
-    "merger",                # Qwen2 VL
-    "modality_projection",   # Idefics, SmolVLM
-    "router",                # MoE Router
-    "gate",                  # MoE Router
+    "multi_modal_projector",  # Llama 3.2 Vision, Pixtral, Llava
+    "merger",  # Qwen2 VL
+    "modality_projection",  # Idefics, SmolVLM
+    "router",  # MoE Router
+    "gate",  # MoE Router
 ]
 
+
 def get_peft_regex(
     model,
-    finetune_vision_layers     : bool = True,
-    finetune_language_layers   : bool = True,
-    finetune_attention_modules : bool = True,
-    finetune_mlp_modules       : bool = True,
-    target_modules             : list[str] = None,
-    vision_tags                : list[str] = ["vision", "image", "visual", "patch",],
-    language_tags              : list[str] = ["language", "text",],
-    attention_tags             : list[str] = ["self_attn", "attention", "attn",],
-    mlp_tags                   : list[str] = ["mlp", "feed_forward", "ffn", "dense",],
+    *,
+    finetune_vision_layers: bool = True,
+    finetune_language_layers: bool = True,
+    finetune_attention_modules: bool = True,
+    finetune_mlp_modules: bool = True,
+    target_modules: List[str] | None = None,
+    vision_tags: List[str] = ("vision", "image", "visual", "patch"),
+    language_tags: List[str] = ("language", "text"),
+    attention_tags: List[str] = ("self_attn", "attention", "attn"),
+    mlp_tags: List[str] = ("mlp", "feed_forward", "ffn", "dense"),
 ) -> str:
     """
-    Create a regex pattern to apply LoRA to only select layers of a model.
+    Build a **safe** regular‑expression that matches ONLY the *leaf*
+    `torch.nn.Linear` layers we want to adapt with LoRA.
+
+    The previous implementation matched any module path that merely
+    *contained* one of the projection names; after fused‑projection
+    rewrites this included helpers such as
+    `model.layers.3.mlp.gate_up_proj → ModuleDict`, which PEFT cannot
+    patch.  We now anchor the name to the **last dot‑separated field**
+    so only genuine linear layers match.
     """
-    # All Unsloth Zoo code licensed under LGPLv3
-    if not finetune_vision_layers and not finetune_language_layers:
+    # — sanity checks --------------------------------------------------
+    if not (finetune_vision_layers or finetune_language_layers):
         raise RuntimeError(
-            "Unsloth: No layers to finetune - please select to finetune the vision and/or the language layers!"
+            "Select at least one of vision / language layers to finetune."
         )
-    if not finetune_attention_modules and not finetune_mlp_modules:
+    if not (finetune_attention_modules or finetune_mlp_modules):
         raise RuntimeError(
-            "Unsloth: No modules to finetune - please select to finetune the attention and/or the mlp modules!"
+            "Select at least one of attention / MLP modules to finetune."
         )
-    pass
 
-    from collections import Counter
-    # Get only linear layers
-    modules = model.named_modules()
-    linear_modules = [name for name, module in modules if isinstance(module, torch.nn.Linear)]
-    all_linear_modules = Counter(x.rsplit(".")[-1] for x in linear_modules)
+    # — collect all leaf‑names of *linear* layers ----------------------
+    linear_modules = [
+        name for name, mod in model.named_modules() if isinstance(mod, torch.nn.Linear)
+    ]
+    leaf_names = [path.rsplit(".", 1)[-1] for path in linear_modules]
+    leaf_counts = Counter(leaf_names)
 
-    # Isolate lm_head / projection matrices if count == 1
     if target_modules is None:
-        only_linear_modules = []
-        projection_modules  = {}
-        for j, (proj, count) in enumerate(all_linear_modules.items()):
-            if count != 1:
-                only_linear_modules.append(proj)
-            else:
-                projection_modules[proj] = j
-        pass
+        # keep names that appear in *more* than one place
+        # (single‑occurrence heads are usually lm_head / projectors)
+        candidate_leafs = [n for n, c in leaf_counts.items() if c > 1]
     else:
-        assert(type(target_modules) is list)
-        only_linear_modules = list(target_modules)
-    pass
-
-    # Create regex matcher
-    regex_model_parts = []
-    if finetune_vision_layers:     regex_model_parts += vision_tags
-    if finetune_language_layers:   regex_model_parts += language_tags
-    regex_components  = []
-    if finetune_attention_modules: regex_components  += attention_tags
-    if finetune_mlp_modules:       regex_components  += mlp_tags
-
-    regex_model_parts = "|".join(regex_model_parts)
-    regex_components  = "|".join(regex_components)
-
-    match_linear_modules = r"(?:" + "|".join(re.escape(x) for x in only_linear_modules) + r")"
-    regex_matcher = \
-        r".*?(?:"  + regex_model_parts + \
-        r").*?(?:" + regex_components + \
-        r").*?"    + match_linear_modules + ".*?"
-
-    # Also account for model.layers.0.self_attn/mlp type modules like Qwen
+        if not isinstance(target_modules, list):
+            raise TypeError("`target_modules` must be a list of strings.")
+        candidate_leafs = list(target_modules)
+
+    # — assemble regex parts ------------------------------------------
+    def _join(xs):
+        return "|".join(map(re.escape, xs)) or "$^"  # empty → no match
+
+    # which *part* of the model path (vision/language)
+    model_part_pat = (
+        _join(vision_tags if finetune_vision_layers else [])
+        + "|"
+        + _join(language_tags if finetune_language_layers else "")
+    )
+    # which *sub‑module* inside the block (attn/mlp)
+    component_pat = (
+        _join(attention_tags if finetune_attention_modules else [])
+        + "|"
+        + _join(mlp_tags if finetune_mlp_modules else "")
+    )
+
+    # exact leaf names – anchor to “preceded by dot or start” AND “end of string”
+    leaf_pat = r"(?:(?<=\.)|^)(?:" + _join(candidate_leafs) + r")$"
+
+    # full matcher
+    regex_matcher = (
+        r".*?(?:" + model_part_pat + r")"  # vision / language part
+        r".*?(?:" + component_pat + r")"  # attn / mlp component
+        r".*?" + leaf_pat  # leaf linear layer
+    )
+
+    # also allow Qwen‑style `model.layers.0.self_attn.q_proj` paths
     if finetune_language_layers:
-        regex_matcher = r"(?:" + regex_matcher + \
-        r")|(?:\bmodel\.layers\.[\d]{1,}\.(?:" + regex_components + \
-        r")\.(?:" + match_linear_modules + r"))"
-    pass
-
-    # Check if regex is wrong since model does not have vision parts
-    check = any(re.search(regex_matcher, name, flags = re.DOTALL) for name in linear_modules)
-    if not check:
-        regex_matcher = \
-            r".*?(?:" + regex_components + \
-            r").*?"   + match_linear_modules + ".*?"
-    pass
-
-    # Final check to confirm if matches exist
-    check = any(re.search(regex_matcher, name, flags = re.DOTALL) for name in linear_modules)
-    if not check and target_modules is not None:
-        raise RuntimeError(
-            f"Unsloth: No layers to finetune? You most likely specified target_modules = {target_modules} incorrectly!"
+        regex_matcher = (
+            regex_matcher
+            + "|"
+            + r"(?:\bmodel\.layers\.\d+\.(?:"
+            + component_pat
+            + r")\."
+            + leaf_pat
+            + ")"
         )
-    elif not check:
+
+    # — verify we actually hit something ------------------------------
+    if not any(re.search(regex_matcher, n) for n in linear_modules):
         raise RuntimeError(
-            f"Unsloth: No layers to finetune for {model.config._name_or_path}. Please file a bug report!"
+            f"Unsloth: the generated regex matched **no** linear layers "
+            f"in {model.__class__.__name__}.  "
+            f"Check your *tags* / *target_modules* settings."
         )
-    pass
     return regex_matcher
+
+
 pass
 
 
 def get_lora_layer_modules():
     # All Unsloth Zoo code licensed under LGPLv3
     import peft.tuners.lora
+
     path = os.path.split(peft.tuners.lora.__file__)[0]
     files = os.listdir(path)
 
     Linear_LoRA_Layers = []
     for file in files:
-        if file == "__init__.py" or not file.endswith(".py"): continue
+        if file == "__init__.py" or not file.endswith(".py"):
+            continue
         item = f"peft.tuners.lora.{file[:-len('.py')]}"
         exec(f"import {item}", locals(), globals())
         modules = dir(eval(item))
         modules = [x for x in modules if x.startswith("Linear") or x.endswith("Linear")]
-        if len(modules) == 0: continue
+        if len(modules) == 0:
+            continue
         exec(f"from {item} import ({', '.join(modules)})", locals(), globals())
-        Linear_LoRA_Layers += [(eval(x), item, x,) for x in modules]
+        Linear_LoRA_Layers += [
+            (
+                eval(x),
+                item,
+                x,
+            )
+            for x in modules
+        ]
     pass
     return tuple(Linear_LoRA_Layers)
+
+
 pass
 
 
@@ -164,16 +186,20 @@ def register_other_hooks(name1, name2, module, _hooks):
         other_hooks = []
         for value in old_hooks.values():
             qualname = getattr(value, "__qualname__", "")
-            name     = getattr(value, "__name__", "")
-            if name1 in qualname or name2 in qualname: pass
-            elif name2 in name or name2 in name: pass
-            else: other_hooks.append(value)
+            name = getattr(value, "__name__", "")
+            if name1 in qualname or name2 in qualname:
+                pass
+            elif name2 in name or name2 in name:
+                pass
+            else:
+                other_hooks.append(value)
         pass
         # Keep none input requires grad hooks
         exec(f"module.{_hooks} = OrderedDict()")
         for hook in other_hooks:
             exec(f"module.register{_hooks[:-1]}(hook)")
         pass
+
     pass
 
     # Remove all previous forward hooks for gradient checkpointing
@@ -199,6 +225,7 @@ def requires_grad_post_hook(module, input, output):
                 output.loss.requires_grad_(True)
             except Exception as _:
                 raise RuntimeError("Unsloth: Failed to make output require gradients!")
+
     pass
 
     def requires_grad_pre_hook(module, input):
@@ -208,19 +235,22 @@ def requires_grad_pre_hook(module, input):
         elif type_input is tuple or type_input is list:
             if len(input) == 0:
                 raise RuntimeError("Unsloth: Failed to make input require gradients!")
-                # print(f"  WARNING: Empty list input to {module.__class__.__name__}!") # 
+                # print(f"  WARNING: Empty list input to {module.__class__.__name__}!") #
                 # return
             if torch.is_floating_point(input[0]):
                 input[0].requires_grad_(True)
         else:
             raise RuntimeError("Unsloth: Failed to make input require gradients!")
+
     pass
 
     # Find 1st ever item which requires grad
     param = None
     for name, param in model.named_parameters():
-        if param.requires_grad: break
-    if param is None: return
+        if param.requires_grad:
+            break
+    if param is None:
+        return
 
     name = re.sub("\.([\d]{1,})\.", r"[\1].", name)
     name_components = name.split(".")
@@ -230,15 +260,18 @@ def requires_grad_pre_hook(module, input):
 
     final_where = None
     # Try getting previous parent module
-    for j in range(len(name_components)-1, 0, -1):
+    for j in range(len(name_components) - 1, 0, -1):
         name_curr = name_components[j]
-        name_pre  = "model." + ".".join(name_components[:j])
+        name_pre = "model." + ".".join(name_components[:j])
         # Disable [\d] since it fails in gradient checkpointing
-        if re.search(r"\[[\d]{1,}\]", name_pre): continue
+        if re.search(r"\[[\d]{1,}\]", name_pre):
+            continue
         module = eval(name_pre)
         if hasattr(module, "forward"):
-            try: forward = inspect.getsource(module.forward)
-            except: continue
+            try:
+                forward = inspect.getsource(module.forward)
+            except:
+                continue
 
             # Normal self.language_model(...)
             if f"self.{name_curr}(" in forward:
@@ -250,7 +283,9 @@ def requires_grad_pre_hook(module, input):
             if f"in self.{module_list}:" in forward:
                 final_where = j
                 break
-            elif re.search(r"for [^\s]{3,} in self\." + module_list, forward) is not None:
+            elif (
+                re.search(r"for [^\s]{3,} in self\." + module_list, forward) is not None
+            ):
                 # Might have failed finding self.layers: like self.layers[...]:
                 final_where = j
                 break
@@ -274,11 +309,18 @@ def requires_grad_pre_hook(module, input):
     module_name = "model." + ".".join(name_components[:final_where])
     module = eval(module_name)
 
-    if hasattr(module, "config") and (module.config.__class__.__name__ in ("CLIPVisionConfig", "SiglipVisionConfig",)):
+    if hasattr(module, "config") and (
+        module.config.__class__.__name__
+        in (
+            "CLIPVisionConfig",
+            "SiglipVisionConfig",
+        )
+    ):
         # CLIP - backtrack to get_input_embeddings since requires_grad fails!
         old_module = model
         for module_name, module in model.named_modules():
-            if not hasattr(module, "get_input_embeddings"): break
+            if not hasattr(module, "get_input_embeddings"):
+                break
             old_module = module
         module = old_module
     pass
@@ -314,6 +356,8 @@ def requires_grad_pre_hook(module, input):
         )
         module.register_forward_pre_hook(requires_grad_pre_hook)
     pass
+
+
 pass
 
 # Unsloth Zoo - Utilities for Unsloth