Skip to content

Commit 9fa1d44

Browse files
committed
Expand mutation operators with built‑in name swaps and regex fuzzing
- Extend operator_name to flip more built‑ins (len↔sum, min↔max, all↔any, str↔repr, etc.) - Add operator_regex to mutate regex literals (quantifier swaps, {1,}↔+, \d↔[0-9], char‐class reversal) - Register operator_regex alongside other call mutations - Update tests to cover new regex and name‐swap behaviors
1 parent 51fd290 commit 9fa1d44

File tree

2 files changed

+209
-7
lines changed

2 files changed

+209
-7
lines changed

mutmut/node_mutation.py

Lines changed: 163 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
"""This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
1+
"""This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
2+
import re
3+
"""This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
4+
import re
25
from typing import Any, Union
36
from collections.abc import Callable, Iterable, Sequence
47
import libcst as cst
@@ -107,11 +110,47 @@ def operator_keywords(
107110

108111

109112
def operator_name(node: cst.Name) -> Iterable[cst.CSTNode]:
110-
name_mappings = {
113+
name_mappings = {
114+
name_mappings = {
111115
"True": "False",
112116
"False": "True",
113117
"deepcopy": "copy",
114-
# TODO: probably need to add a lot of things here... some builtins maybe, what more?
118+
"copy": "deepcopy",
119+
120+
# common aggregates
121+
"len": "sum",
122+
"sum": "len",
123+
"min": "max",
124+
"max": "min",
125+
126+
# boolean checks
127+
"all": "any",
128+
"any": "all",
129+
130+
# ordering
131+
"sorted": "reversed",
132+
"reversed": "sorted",
133+
134+
# numeric types
135+
"int": "float",
136+
"float": "int",
137+
138+
# byte types
139+
"bytes": "bytearray",
140+
"bytearray": "bytes",
141+
142+
# (optionally) mapping/filtering
143+
"map": "filter",
144+
"filter": "map",
145+
146+
# enums
147+
"Enum": "StrEnum",
148+
"StrEnum": "Enum",
149+
"IntEnum": "Enum",
150+
151+
# dict ↔ set might be fun… however, beware lol
152+
# "dict": "set",
153+
# "set": "dict",
115154
}
116155
if node.value in name_mappings:
117156
yield node.with_changes(value=name_mappings[node.value])
@@ -186,6 +225,123 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
186225
for i in range(len(node.cases)):
187226
yield node.with_changes(cases=[*node.cases[:i], *node.cases[i+1:]])
188227

228+
def _mutate_regex(inner: str) -> list[str]:
229+
"""
230+
Generate ‘nasty’ variants of a regex body:
231+
- swap + ↔ * and ? ↔ *
232+
- turn `{0,1}` ↔ ?
233+
- turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
234+
- reverse the contents of any simple [...] class
235+
"""
236+
muts: list[str] = []
237+
# + <-> *
238+
if "+" in inner:
239+
muts.append(inner.replace("+", "*"))
240+
if "*" in inner:
241+
muts.append(inner.replace("*", "+"))
242+
# ? <-> *
243+
if "?" in inner:
244+
muts.append(inner.replace("?", "*"))
245+
if "*" in inner:
246+
muts.append(inner.replace("*", "?"))
247+
# {0,1} -> ? and ? -> {0,1}
248+
if re.search(r"\{0,1\}", inner):
249+
muts.append(re.sub(r"\{0,1\}", "?", inner))
250+
if "?" in inner:
251+
muts.append(re.sub(r"\?", "{0,1}", inner))
252+
# digit class ↔ shorthand
253+
if "\\d" in inner:
254+
muts.append(inner.replace("\\d", "[0-9]"))
255+
if "[0-9]" in inner:
256+
muts.append(inner.replace("[0-9]", "\\d"))
257+
# word class ↔ shorthand
258+
if "\\w" in inner:
259+
muts.append(inner.replace("\\w", "[A-Za-z0-9_]"))
260+
if "[A-Za-z0-9_]" in inner:
261+
muts.append(inner.replace("[A-Za-z0-9_]", "\\w"))
262+
# reverse simple character classes
263+
for mobj in re.finditer(r"\[([^\]]+)\]", inner):
264+
content = mobj.group(1)
265+
rev = content[::-1]
266+
orig = f"[{content}]"
267+
mutated = f"[{rev}]"
268+
muts.append(inner.replace(orig, mutated))
269+
# dedupe, preserve order
270+
return list(dict.fromkeys(muts))
271+
272+
273+
def operator_regex(node: cst.Call) -> Iterable[cst.CSTNode]:
274+
"""
275+
Look for calls like re.compile(r'…'), re.match, re.search, etc.,
276+
extract the first SimpleString arg, apply _mutate_regex, and yield
277+
one mutant per new pattern.
278+
"""
279+
if not m.matches(
280+
node,
281+
m.Call(
282+
func=m.Attribute(
283+
value=m.Name("re"),
284+
attr=m.MatchIfTrue(
285+
lambda t: t.value
286+
in ("compile", "match", "search", "fullmatch", "findall")
287+
),
288+
),
289+
args=[m.Arg(value=m.SimpleString())],
290+
),
291+
):
292+
return
293+
294+
arg = node.args[0]
295+
lit: cst.SimpleString = arg.value # type: ignore
296+
raw = lit.value # e.g. r'\d+\w*'
297+
# strip off leading r/R
298+
prefix = ""
299+
body = raw
300+
if raw[:2].lower() == "r'" or raw[:2].lower() == 'r"':
301+
prefix, body = raw[0], raw[1:]
302+
quote = body[0]
303+
inner = body[1:-1]
304+
305+
for mutated_inner in _mutate_regex(inner):
306+
new_raw = f"{prefix}{quote}{mutated_inner}{quote}"
307+
new_lit = lit.with_changes(value=new_raw)
308+
new_arg = arg.with_changes(value=new_lit)
309+
yield node.with_changes(args=[new_arg, *node.args[1:]])
310+
311+
312+
def operator_chr_ord(node: cst.Call) -> Iterable[cst.CSTNode]:
313+
"""Adjust chr/ord calls slightly instead of swapping names."""
314+
if isinstance(node.func, cst.Name) and node.args:
315+
name = node.func.value
316+
first_arg = node.args[0]
317+
if name == "chr":
318+
incr = cst.BinaryOperation(
319+
left=first_arg.value,
320+
operator=cst.Add(),
321+
right=cst.Integer("1"),
322+
)
323+
yield node.with_changes(args=[first_arg.with_changes(value=incr), *node.args[1:]])
324+
elif name == "ord":
325+
new_call = node
326+
yield cst.BinaryOperation(left=new_call, operator=cst.Add(), right=cst.Integer("1"))
327+
328+
329+
def operator_enum_attribute(node: cst.Attribute) -> Iterable[cst.CSTNode]:
330+
"""Swap common Enum base classes."""
331+
if not m.matches(node.value, m.Name("enum")):
332+
return
333+
334+
attr = node.attr
335+
if not isinstance(attr, cst.Name):
336+
return
337+
338+
if attr.value == "Enum":
339+
yield node.with_changes(attr=cst.Name("StrEnum"))
340+
yield node.with_changes(attr=cst.Name("IntEnum"))
341+
elif attr.value in {"StrEnum", "IntEnum"}:
342+
yield node.with_changes(attr=cst.Name("Enum"))
343+
344+
189345
# Operators that should be called on specific node types
190346
mutation_operators: OPERATORS_TYPE = [
191347
(cst.BaseNumber, operator_number),
@@ -197,6 +353,10 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
197353
(cst.UnaryOperation, operator_remove_unary_ops),
198354
(cst.Call, operator_dict_arguments),
199355
(cst.Call, operator_arg_removal),
356+
(cst.Call, operator_chr_ord),
357+
(cst.Call, operator_regex),
358+
(cst.Call, operator_chr_ord),
359+
(cst.Attribute, operator_enum_attribute),
200360
(cst.Lambda, operator_lambda),
201361
(cst.CSTNode, operator_keywords),
202362
(cst.CSTNode, operator_swap_op),
@@ -212,5 +372,3 @@ def _simple_mutation_mapping(
212372
if mutated_node_type:
213373
yield mutated_node_type()
214374

215-
216-
# TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes

tests/test_mutation.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
get_diff_for_mutant,
1010
orig_function_and_class_names_from_key,
1111
run_forced_fail_test,
12-
Config,
1312
MutmutProgrammaticFailException,
1413
CatchOutput,
1514
)
@@ -153,7 +152,12 @@ def mutated_module(source: str) -> str:
153152
('def foo(a, *args, **kwargs): pass', []),
154153
('import foo', []),
155154
('isinstance(a, b)', []),
156-
('len(a)', []),
155+
('len(a)', ['sum(a)', 'len(None)']),
156+
('sum(a)', ['len(a)', 'sum(None)']),
157+
('chr(65)', ['chr(65 + 1)', 'chr(None)']),
158+
("ord('a')", ["ord('a') + 1", 'ord(None)']),
159+
('enum.Enum', ['enum.StrEnum', 'enum.IntEnum']),
160+
('enum.StrEnum', ['enum.Enum']),
157161
('deepcopy(obj)', ['copy(obj)', 'deepcopy(None)']),
158162
]
159163
)
@@ -667,3 +671,43 @@ def add(self, *args, **kwargs):
667671
xǁAdderǁadd__mutmut_orig.__name__ = 'xǁAdderǁadd'
668672
669673
print(Adder(1).add(2))"""
674+
675+
@pytest.mark.parametrize("original, want_patterns", [
676+
(
677+
"re.compile(r'\\d+')",
678+
[
679+
"re.compile(r'\\d*')", # + → *
680+
"re.compile(r'[0-9]+')", # \d → [0-9]
681+
],
682+
),
683+
(
684+
"re.search(r'[abc]+')",
685+
[
686+
"re.search(r'[abc]*')", # + → *
687+
"re.search(r'[cba]+')", # [abc] → [cba]
688+
],
689+
),
690+
(
691+
"re.match(r'\\w{1,}')",
692+
[
693+
"re.match(r'[A-Za-z0-9_]{1,}')" # \w → [A-Za-z0-9_]
694+
],
695+
),
696+
(
697+
"re.match(r'foo?')",
698+
[
699+
"re.match(r'foo*')", # ? → *
700+
"re.match(r'foo{0,1}')", # ? → {0,1}
701+
],
702+
),
703+
(
704+
"re.search(r'bar{0,1}')",
705+
[
706+
"re.search(r'bar?')", # {0,1} → ?
707+
],
708+
),
709+
])
710+
def test_regex_mutations_loose(original, want_patterns):
711+
mutants = mutants_for_source(original)
712+
for want in want_patterns:
713+
assert want in mutants, f"expected {want!r} in {mutants}"

0 commit comments

Comments
 (0)