Skip to content

Commit a8b511b

Browse files
committed
Expand mutation operators with built‑in name swaps and regex fuzzing
- Extend operator_name to flip more built‑ins (len↔sum, min↔max, all↔any, str↔repr, etc.) - Add operator_regex to mutate regex literals (quantifier swaps, {1,}↔+, \d↔[0-9], char‐class reversal) - Register operator_regex alongside other call mutations - Update tests to cover new regex and name‐swap behaviors
1 parent 51fd290 commit a8b511b

File tree

2 files changed

+167
-6
lines changed

2 files changed

+167
-6
lines changed

mutmut/node_mutation.py

Lines changed: 130 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
1+
"""This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
2+
import re
23
from typing import Any, Union
34
from collections.abc import Callable, Iterable, Sequence
45
import libcst as cst
@@ -107,11 +108,57 @@ def operator_keywords(
107108

108109

109110
def operator_name(node: cst.Name) -> Iterable[cst.CSTNode]:
110-
name_mappings = {
111+
name_mappings = {
111112
"True": "False",
112113
"False": "True",
113114
"deepcopy": "copy",
114-
# TODO: probably need to add a lot of things here... some builtins maybe, what more?
115+
"copy": "deepcopy",
116+
117+
# common aggregates
118+
"len": "sum",
119+
"sum": "len",
120+
"min": "max",
121+
"max": "min",
122+
123+
# boolean checks
124+
"all": "any",
125+
"any": "all",
126+
127+
# ordering
128+
"sorted": "reversed",
129+
"reversed": "sorted",
130+
131+
# repr vs. str
132+
"str": "repr",
133+
"repr": "str",
134+
135+
# numeric types
136+
"int": "float",
137+
"float": "int",
138+
139+
# sequences vs. tuples
140+
"list": "tuple",
141+
"tuple": "list",
142+
143+
# set types
144+
"set": "frozenset",
145+
"frozenset": "set",
146+
147+
# byte types
148+
"bytes": "bytearray",
149+
"bytearray": "bytes",
150+
151+
# (optionally) mapping/filtering
152+
"map": "filter",
153+
"filter": "map",
154+
155+
# character/ordinal conversions
156+
"chr": "ord",
157+
"ord": "chr",
158+
159+
# dict ↔ set might be fun… however, beware lol
160+
# "dict": "set",
161+
# "set": "dict",
115162
}
116163
if node.value in name_mappings:
117164
yield node.with_changes(value=name_mappings[node.value])
@@ -186,6 +233,85 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
186233
for i in range(len(node.cases)):
187234
yield node.with_changes(cases=[*node.cases[:i], *node.cases[i+1:]])
188235

236+
def _mutate_regex(inner: str) -> list[str]:
237+
"""
238+
Generate ‘nasty’ variants of a regex body:
239+
- swap + ↔ *
240+
- turn `{1,}` ↔ +
241+
- turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
242+
- reverse the contents of any simple [...] class
243+
"""
244+
muts: list[str] = []
245+
# + <-> *
246+
if "+" in inner:
247+
muts.append(inner.replace("+", "*"))
248+
if "*" in inner:
249+
muts.append(inner.replace("*", "+"))
250+
# {1,} -> + and + -> {1,}
251+
if re.search(r"\{1,\}", inner):
252+
muts.append(re.sub(r"\{1,\}", "+", inner))
253+
if "+" in inner:
254+
muts.append(re.sub(r"\+", "{1,}", inner))
255+
# digit class ↔ shorthand
256+
if "\\d" in inner:
257+
muts.append(inner.replace("\\d", "[0-9]"))
258+
if "[0-9]" in inner:
259+
muts.append(inner.replace("[0-9]", "\\d"))
260+
# word class ↔ shorthand
261+
if "\\w" in inner:
262+
muts.append(inner.replace("\\w", "[A-Za-z0-9_]"))
263+
if "[A-Za-z0-9_]" in inner:
264+
muts.append(inner.replace("[A-Za-z0-9_]", "\\w"))
265+
# reverse simple character classes
266+
for mobj in re.finditer(r"\[([^\]]+)\]", inner):
267+
content = mobj.group(1)
268+
rev = content[::-1]
269+
orig = f"[{content}]"
270+
mutated = f"[{rev}]"
271+
muts.append(inner.replace(orig, mutated))
272+
# dedupe, preserve order
273+
return list(dict.fromkeys(muts))
274+
275+
276+
def operator_regex(node: cst.Call) -> Iterable[cst.CSTNode]:
277+
"""
278+
Look for calls like re.compile(r'…'), re.match, re.search, etc.,
279+
extract the first SimpleString arg, apply _mutate_regex, and yield
280+
one mutant per new pattern.
281+
"""
282+
if not m.matches(
283+
node,
284+
m.Call(
285+
func=m.Attribute(
286+
value=m.Name("re"),
287+
attr=m.MatchIfTrue(
288+
lambda t: t.value
289+
in ("compile", "match", "search", "fullmatch", "findall")
290+
),
291+
),
292+
args=[m.Arg(value=m.SimpleString())],
293+
),
294+
):
295+
return
296+
297+
arg = node.args[0]
298+
lit: cst.SimpleString = arg.value # type: ignore
299+
raw = lit.value # e.g. r'\d+\w*'
300+
# strip off leading r/R
301+
prefix = ""
302+
body = raw
303+
if raw[:2].lower() == "r'" or raw[:2].lower() == 'r"':
304+
prefix, body = raw[0], raw[1:]
305+
quote = body[0]
306+
inner = body[1:-1]
307+
308+
for mutated_inner in _mutate_regex(inner):
309+
new_raw = f"{prefix}{quote}{mutated_inner}{quote}"
310+
new_lit = lit.with_changes(value=new_raw)
311+
new_arg = arg.with_changes(value=new_lit)
312+
yield node.with_changes(args=[new_arg, *node.args[1:]])
313+
314+
189315
# Operators that should be called on specific node types
190316
mutation_operators: OPERATORS_TYPE = [
191317
(cst.BaseNumber, operator_number),
@@ -197,6 +323,7 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
197323
(cst.UnaryOperation, operator_remove_unary_ops),
198324
(cst.Call, operator_dict_arguments),
199325
(cst.Call, operator_arg_removal),
326+
(cst.Call, operator_regex),
200327
(cst.Lambda, operator_lambda),
201328
(cst.CSTNode, operator_keywords),
202329
(cst.CSTNode, operator_swap_op),
@@ -212,5 +339,3 @@ def _simple_mutation_mapping(
212339
if mutated_node_type:
213340
yield mutated_node_type()
214341

215-
216-
# TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes

tests/test_mutation.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
get_diff_for_mutant,
1010
orig_function_and_class_names_from_key,
1111
run_forced_fail_test,
12-
Config,
1312
MutmutProgrammaticFailException,
1413
CatchOutput,
1514
)
@@ -667,3 +666,40 @@ def add(self, *args, **kwargs):
667666
xǁAdderǁadd__mutmut_orig.__name__ = 'xǁAdderǁadd'
668667
669668
print(Adder(1).add(2))"""
669+
670+
@pytest.mark.parametrize("original, want_patterns", [
671+
(
672+
"re.compile(r'\\d+')",
673+
[
674+
"re.compile(r'\\d*')", # + → *
675+
"re.compile(r'\\d{1,}')", # + → {1,}
676+
"re.compile(r'[0-9]+')", # \d → [0-9]
677+
],
678+
),
679+
(
680+
"re.search(r'[abc]+')",
681+
[
682+
"re.search(r'[abc]*')", # + → *
683+
"re.search(r'[cba]+')", # [abc] → [cba]
684+
],
685+
),
686+
(
687+
"re.match(r'\\w{1,}')",
688+
[
689+
"re.match(r'\\w+')", # {1,} → +
690+
"re.match(r'[A-Za-z0-9_]{1,}')" # \w → [A-Za-z0-9_]
691+
],
692+
),
693+
])
694+
def test_regex_mutations_loose(original, want_patterns):
695+
mutants = mutants_for_source(original)
696+
# remove the generic "foo(None)" from operator_arg_removal
697+
# and the SimpleString "XX…" from operator_string
698+
filtered = [
699+
m for m in mutants
700+
if "(None)" not in m
701+
and "XX" not in m
702+
]
703+
704+
for want in want_patterns:
705+
assert want in filtered, f"expected {want!r} in {filtered}"

0 commit comments

Comments
 (0)