1- """This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
1+ """This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
2+ import re
23from typing import Any , Union
34from collections .abc import Callable , Iterable , Sequence
45import libcst as cst
@@ -107,11 +108,46 @@ def operator_keywords(
107108
108109
109110def operator_name (node : cst .Name ) -> Iterable [cst .CSTNode ]:
110- name_mappings = {
111+ name_mappings = {
111112 "True" : "False" ,
112113 "False" : "True" ,
113114 "deepcopy" : "copy" ,
114- # TODO: probably need to add a lot of things here... some builtins maybe, what more?
115+ "copy" : "deepcopy" ,
116+
117+ # common aggregates
118+ "len" : "sum" ,
119+ "sum" : "len" ,
120+ "min" : "max" ,
121+ "max" : "min" ,
122+
123+ # boolean checks
124+ "all" : "any" ,
125+ "any" : "all" ,
126+
127+ # ordering
128+ "sorted" : "reversed" ,
129+ "reversed" : "sorted" ,
130+
131+ # numeric types
132+ "int" : "float" ,
133+ "float" : "int" ,
134+
135+ # byte types
136+ "bytes" : "bytearray" ,
137+ "bytearray" : "bytes" ,
138+
139+ # (optionally) mapping/filtering
140+ "map" : "filter" ,
141+ "filter" : "map" ,
142+
143+ # enums
144+ "Enum" : "StrEnum" ,
145+ "StrEnum" : "Enum" ,
146+ "IntEnum" : "Enum" ,
147+
148+ # dict ↔ set might be fun… however, beware lol
149+ # "dict": "set",
150+ # "set": "dict",
115151 }
116152 if node .value in name_mappings :
117153 yield node .with_changes (value = name_mappings [node .value ])
@@ -186,6 +222,123 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
186222 for i in range (len (node .cases )):
187223 yield node .with_changes (cases = [* node .cases [:i ], * node .cases [i + 1 :]])
188224
225+ def _mutate_regex (inner : str ) -> list [str ]:
226+ """
227+ Generate ‘nasty’ variants of a regex body:
228+ - swap + ↔ * and ? ↔ *
229+ - turn `{0,1}` ↔ ?
230+ - turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
231+ - reverse the contents of any simple [...] class
232+ """
233+ muts : list [str ] = []
234+ # + <-> *
235+ if "+" in inner :
236+ muts .append (inner .replace ("+" , "*" ))
237+ if "*" in inner :
238+ muts .append (inner .replace ("*" , "+" ))
239+ # ? <-> *
240+ if "?" in inner :
241+ muts .append (inner .replace ("?" , "*" ))
242+ if "*" in inner :
243+ muts .append (inner .replace ("*" , "?" ))
244+ # {0,1} -> ? and ? -> {0,1}
245+ if re .search (r"\{0,1\}" , inner ):
246+ muts .append (re .sub (r"\{0,1\}" , "?" , inner ))
247+ if "?" in inner :
248+ muts .append (re .sub (r"\?" , "{0,1}" , inner ))
249+ # digit class ↔ shorthand
250+ if "\\ d" in inner :
251+ muts .append (inner .replace ("\\ d" , "[0-9]" ))
252+ if "[0-9]" in inner :
253+ muts .append (inner .replace ("[0-9]" , "\\ d" ))
254+ # word class ↔ shorthand
255+ if "\\ w" in inner :
256+ muts .append (inner .replace ("\\ w" , "[A-Za-z0-9_]" ))
257+ if "[A-Za-z0-9_]" in inner :
258+ muts .append (inner .replace ("[A-Za-z0-9_]" , "\\ w" ))
259+ # reverse simple character classes
260+ for mobj in re .finditer (r"\[([^\]]+)\]" , inner ):
261+ content = mobj .group (1 )
262+ rev = content [::- 1 ]
263+ orig = f"[{ content } ]"
264+ mutated = f"[{ rev } ]"
265+ muts .append (inner .replace (orig , mutated ))
266+ # dedupe, preserve order
267+ return list (dict .fromkeys (muts ))
268+
269+
270+ def operator_regex (node : cst .Call ) -> Iterable [cst .CSTNode ]:
271+ """
272+ Look for calls like re.compile(r'…'), re.match, re.search, etc.,
273+ extract the first SimpleString arg, apply _mutate_regex, and yield
274+ one mutant per new pattern.
275+ """
276+ if not m .matches (
277+ node ,
278+ m .Call (
279+ func = m .Attribute (
280+ value = m .Name ("re" ),
281+ attr = m .MatchIfTrue (
282+ lambda t : t .value
283+ in ("compile" , "match" , "search" , "fullmatch" , "findall" )
284+ ),
285+ ),
286+ args = [m .Arg (value = m .SimpleString ())],
287+ ),
288+ ):
289+ return
290+
291+ arg = node .args [0 ]
292+ lit : cst .SimpleString = arg .value # type: ignore
293+ raw = lit .value # e.g. r'\d+\w*'
294+ # strip off leading r/R
295+ prefix = ""
296+ body = raw
297+ if raw [:2 ].lower () == "r'" or raw [:2 ].lower () == 'r"' :
298+ prefix , body = raw [0 ], raw [1 :]
299+ quote = body [0 ]
300+ inner = body [1 :- 1 ]
301+
302+ for mutated_inner in _mutate_regex (inner ):
303+ new_raw = f"{ prefix } { quote } { mutated_inner } { quote } "
304+ new_lit = lit .with_changes (value = new_raw )
305+ new_arg = arg .with_changes (value = new_lit )
306+ yield node .with_changes (args = [new_arg , * node .args [1 :]])
307+
308+
309+ def operator_chr_ord (node : cst .Call ) -> Iterable [cst .CSTNode ]:
310+ """Adjust chr/ord calls slightly instead of swapping names."""
311+ if isinstance (node .func , cst .Name ) and node .args :
312+ name = node .func .value
313+ first_arg = node .args [0 ]
314+ if name == "chr" :
315+ incr = cst .BinaryOperation (
316+ left = first_arg .value ,
317+ operator = cst .Add (),
318+ right = cst .Integer ("1" ),
319+ )
320+ yield node .with_changes (args = [first_arg .with_changes (value = incr ), * node .args [1 :]])
321+ elif name == "ord" :
322+ new_call = node
323+ yield cst .BinaryOperation (left = new_call , operator = cst .Add (), right = cst .Integer ("1" ))
324+
325+
326+ def operator_enum_attribute (node : cst .Attribute ) -> Iterable [cst .CSTNode ]:
327+ """Swap common Enum base classes."""
328+ if not m .matches (node .value , m .Name ("enum" )):
329+ return
330+
331+ attr = node .attr
332+ if not isinstance (attr , cst .Name ):
333+ return
334+
335+ if attr .value == "Enum" :
336+ yield node .with_changes (attr = cst .Name ("StrEnum" ))
337+ yield node .with_changes (attr = cst .Name ("IntEnum" ))
338+ elif attr .value in {"StrEnum" , "IntEnum" }:
339+ yield node .with_changes (attr = cst .Name ("Enum" ))
340+
341+
189342# Operators that should be called on specific node types
190343mutation_operators : OPERATORS_TYPE = [
191344 (cst .BaseNumber , operator_number ),
@@ -197,6 +350,10 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
197350 (cst .UnaryOperation , operator_remove_unary_ops ),
198351 (cst .Call , operator_dict_arguments ),
199352 (cst .Call , operator_arg_removal ),
353+ (cst .Call , operator_chr_ord ),
354+ (cst .Call , operator_regex ),
355+ (cst .Call , operator_chr_ord ),
356+ (cst .Attribute , operator_enum_attribute ),
200357 (cst .Lambda , operator_lambda ),
201358 (cst .CSTNode , operator_keywords ),
202359 (cst .CSTNode , operator_swap_op ),
@@ -212,5 +369,3 @@ def _simple_mutation_mapping(
212369 if mutated_node_type :
213370 yield mutated_node_type ()
214371
215-
216- # TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes
0 commit comments