Expand mutation operators with built‑in name swaps and regex fuzzing

Magic-Man-us · Magic-Man-us · commit 9fa1d440315f · 2025-06-01T12:32:00.000-04:00
- Extend operator_name to flip more built‑ins (len↔sum, min↔max, all↔any, str↔repr, etc.)
- Add operator_regex to mutate regex literals (quantifier swaps, {1,}↔+, \d↔[0-9], char‐class reversal)
- Register operator_regex alongside other call mutations
- Update tests to cover new regex and name‐swap behaviors
diff --git a/mutmut/node_mutation.py b/mutmut/node_mutation.py
@@ -1,4 +1,7 @@
-"""This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
+"""This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
+import re
+"""This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
+import re
 from typing import Any, Union
 from collections.abc import Callable, Iterable, Sequence
 import libcst as cst
@@ -107,11 +110,47 @@ def operator_keywords(
 
 
 def operator_name(node: cst.Name) -> Iterable[cst.CSTNode]:
-    name_mappings = {
+    name_mappings = {    
+    name_mappings = {    
         "True": "False",
         "False": "True",
         "deepcopy": "copy",
-        # TODO: probably need to add a lot of things here... some builtins maybe, what more?
+        "copy": "deepcopy",
+
+        # common aggregates
+        "len": "sum",
+        "sum": "len",
+        "min": "max",
+        "max": "min",
+
+        # boolean checks
+        "all": "any",
+        "any": "all",
+
+        # ordering
+        "sorted":  "reversed",
+        "reversed": "sorted",
+
+        # numeric types
+        "int": "float",
+        "float": "int",
+
+        # byte types
+        "bytes": "bytearray",
+        "bytearray": "bytes",
+
+        # (optionally) mapping/filtering
+        "map": "filter",
+        "filter": "map",
+
+        # enums
+        "Enum": "StrEnum",
+        "StrEnum": "Enum",
+        "IntEnum": "Enum",
+
+        # dict ↔ set might be fun… however, beware lol
+        # "dict":       "set",
+        # "set":        "dict",
     }
     if node.value in name_mappings:
         yield node.with_changes(value=name_mappings[node.value])
@@ -186,6 +225,123 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
         for i in range(len(node.cases)):
             yield node.with_changes(cases=[*node.cases[:i], *node.cases[i+1:]])
 
+def _mutate_regex(inner: str) -> list[str]:
+    """
+    Generate ‘nasty’ variants of a regex body:
+     - swap + ↔ * and ? ↔ *
+     - turn `{0,1}` ↔ ?
+     - turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
+     - reverse the contents of any simple [...] class
+    """
+    muts: list[str] = []
+    # + <-> *
+    if "+" in inner:
+        muts.append(inner.replace("+", "*"))
+    if "*" in inner:
+        muts.append(inner.replace("*", "+"))
+    # ? <-> *
+    if "?" in inner:
+        muts.append(inner.replace("?", "*"))
+    if "*" in inner:
+        muts.append(inner.replace("*", "?"))
+    # {0,1} -> ?  and  ? -> {0,1}
+    if re.search(r"\{0,1\}", inner):
+        muts.append(re.sub(r"\{0,1\}", "?", inner))
+    if "?" in inner:
+        muts.append(re.sub(r"\?", "{0,1}", inner))
+    # digit class ↔ shorthand
+    if "\\d" in inner:
+        muts.append(inner.replace("\\d", "[0-9]"))
+    if "[0-9]" in inner:
+        muts.append(inner.replace("[0-9]", "\\d"))
+    # word class ↔ shorthand
+    if "\\w" in inner:
+        muts.append(inner.replace("\\w", "[A-Za-z0-9_]"))
+    if "[A-Za-z0-9_]" in inner:
+        muts.append(inner.replace("[A-Za-z0-9_]", "\\w"))
+    # reverse simple character classes
+    for mobj in re.finditer(r"\[([^\]]+)\]", inner):
+        content = mobj.group(1)
+        rev = content[::-1]
+        orig = f"[{content}]"
+        mutated = f"[{rev}]"
+        muts.append(inner.replace(orig, mutated))
+    # dedupe, preserve order
+    return list(dict.fromkeys(muts))
+
+
+def operator_regex(node: cst.Call) -> Iterable[cst.CSTNode]:
+    """
+    Look for calls like re.compile(r'…'), re.match, re.search, etc.,
+    extract the first SimpleString arg, apply _mutate_regex, and yield
+    one mutant per new pattern.
+    """
+    if not m.matches(
+        node,
+        m.Call(
+            func=m.Attribute(
+                value=m.Name("re"),
+                attr=m.MatchIfTrue(
+                    lambda t: t.value
+                    in ("compile", "match", "search", "fullmatch", "findall")
+                ),
+            ),
+            args=[m.Arg(value=m.SimpleString())],
+        ),
+    ):
+        return
+
+    arg = node.args[0]
+    lit: cst.SimpleString = arg.value  # type: ignore
+    raw = lit.value  # e.g. r'\d+\w*'
+    # strip off leading r/R
+    prefix = ""
+    body = raw
+    if raw[:2].lower() == "r'" or raw[:2].lower() == 'r"':
+        prefix, body = raw[0], raw[1:]
+    quote = body[0]
+    inner = body[1:-1]
+
+    for mutated_inner in _mutate_regex(inner):
+        new_raw = f"{prefix}{quote}{mutated_inner}{quote}"
+        new_lit = lit.with_changes(value=new_raw)
+        new_arg = arg.with_changes(value=new_lit)
+        yield node.with_changes(args=[new_arg, *node.args[1:]])
+
+
+def operator_chr_ord(node: cst.Call) -> Iterable[cst.CSTNode]:
+    """Adjust chr/ord calls slightly instead of swapping names."""
+    if isinstance(node.func, cst.Name) and node.args:
+        name = node.func.value
+        first_arg = node.args[0]
+        if name == "chr":
+            incr = cst.BinaryOperation(
+                left=first_arg.value,
+                operator=cst.Add(),
+                right=cst.Integer("1"),
+            )
+            yield node.with_changes(args=[first_arg.with_changes(value=incr), *node.args[1:]])
+        elif name == "ord":
+            new_call = node
+            yield cst.BinaryOperation(left=new_call, operator=cst.Add(), right=cst.Integer("1"))
+
+
+def operator_enum_attribute(node: cst.Attribute) -> Iterable[cst.CSTNode]:
+    """Swap common Enum base classes."""
+    if not m.matches(node.value, m.Name("enum")):
+        return
+
+    attr = node.attr
+    if not isinstance(attr, cst.Name):
+        return
+
+    if attr.value == "Enum":
+        yield node.with_changes(attr=cst.Name("StrEnum"))
+        yield node.with_changes(attr=cst.Name("IntEnum"))
+    elif attr.value in {"StrEnum", "IntEnum"}:
+        yield node.with_changes(attr=cst.Name("Enum"))
+
+
 # Operators that should be called on specific node types
 mutation_operators: OPERATORS_TYPE = [
     (cst.BaseNumber, operator_number),
@@ -197,6 +353,10 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
     (cst.UnaryOperation, operator_remove_unary_ops),
     (cst.Call, operator_dict_arguments),
     (cst.Call, operator_arg_removal),
+    (cst.Call, operator_chr_ord),
+    (cst.Call, operator_regex),
+    (cst.Call, operator_chr_ord),
+    (cst.Attribute, operator_enum_attribute),
     (cst.Lambda, operator_lambda),
     (cst.CSTNode, operator_keywords),
     (cst.CSTNode, operator_swap_op),
@@ -212,5 +372,3 @@ def _simple_mutation_mapping(
     if mutated_node_type:
         yield mutated_node_type()
 
-
-# TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes
diff --git a/tests/test_mutation.py b/tests/test_mutation.py
@@ -9,7 +9,6 @@
     get_diff_for_mutant,
     orig_function_and_class_names_from_key,
     run_forced_fail_test,
-    Config,
     MutmutProgrammaticFailException,
     CatchOutput,
 )
@@ -153,7 +152,12 @@ def mutated_module(source: str) -> str:
         ('def foo(a, *args, **kwargs): pass', []),
         ('import foo', []),
         ('isinstance(a, b)', []),
-        ('len(a)', []),
+        ('len(a)', ['sum(a)', 'len(None)']),
+        ('sum(a)', ['len(a)', 'sum(None)']),
+        ('chr(65)', ['chr(65 + 1)', 'chr(None)']),
+        ("ord('a')", ["ord('a') + 1", 'ord(None)']),
+        ('enum.Enum', ['enum.StrEnum', 'enum.IntEnum']),
+        ('enum.StrEnum', ['enum.Enum']),
         ('deepcopy(obj)', ['copy(obj)', 'deepcopy(None)']),
     ]
 )
@@ -667,3 +671,43 @@ def add(self, *args, **kwargs):
     xǁAdderǁadd__mutmut_orig.__name__ = 'xǁAdderǁadd'
 
 print(Adder(1).add(2))"""
+
+@pytest.mark.parametrize("original, want_patterns", [
+    (
+        "re.compile(r'\\d+')",
+        [
+            "re.compile(r'\\d*')",              # + → *
+            "re.compile(r'[0-9]+')",            # \d → [0-9]
+        ],
+    ),
+    (
+        "re.search(r'[abc]+')",
+        [
+            "re.search(r'[abc]*')",             # + → *
+            "re.search(r'[cba]+')",             # [abc] → [cba]
+        ],
+    ),
+    (
+        "re.match(r'\\w{1,}')",
+        [
+            "re.match(r'[A-Za-z0-9_]{1,}')"     # \w → [A-Za-z0-9_]
+        ],
+    ),
+    (
+        "re.match(r'foo?')",
+        [
+            "re.match(r'foo*')",            # ? → *
+            "re.match(r'foo{0,1}')",       # ? → {0,1}
+        ],
+    ),
+    (
+        "re.search(r'bar{0,1}')",
+        [
+            "re.search(r'bar?')",          # {0,1} → ?
+        ],
+    ),
+])
+def test_regex_mutations_loose(original, want_patterns):
+    mutants = mutants_for_source(original)
+    for want in want_patterns:
+        assert want in mutants, f"expected {want!r} in {mutants}"