1- """This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
1+ """This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
2+ import re
3+ """This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
4+ import re
25from typing import Any , Union
36from collections .abc import Callable , Iterable , Sequence
47import libcst as cst
@@ -107,11 +110,47 @@ def operator_keywords(
107110
108111
109112def operator_name (node : cst .Name ) -> Iterable [cst .CSTNode ]:
110- name_mappings = {
113+ name_mappings = {
114+ name_mappings = {
111115 "True" : "False" ,
112116 "False" : "True" ,
113117 "deepcopy" : "copy" ,
114- # TODO: probably need to add a lot of things here... some builtins maybe, what more?
118+ "copy" : "deepcopy" ,
119+
120+ # common aggregates
121+ "len" : "sum" ,
122+ "sum" : "len" ,
123+ "min" : "max" ,
124+ "max" : "min" ,
125+
126+ # boolean checks
127+ "all" : "any" ,
128+ "any" : "all" ,
129+
130+ # ordering
131+ "sorted" : "reversed" ,
132+ "reversed" : "sorted" ,
133+
134+ # numeric types
135+ "int" : "float" ,
136+ "float" : "int" ,
137+
138+ # byte types
139+ "bytes" : "bytearray" ,
140+ "bytearray" : "bytes" ,
141+
142+ # (optionally) mapping/filtering
143+ "map" : "filter" ,
144+ "filter" : "map" ,
145+
146+ # enums
147+ "Enum" : "StrEnum" ,
148+ "StrEnum" : "Enum" ,
149+ "IntEnum" : "Enum" ,
150+
151+ # dict ↔ set might be fun… however, beware lol
152+ # "dict": "set",
153+ # "set": "dict",
115154 }
116155 if node .value in name_mappings :
117156 yield node .with_changes (value = name_mappings [node .value ])
@@ -186,6 +225,123 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
186225 for i in range (len (node .cases )):
187226 yield node .with_changes (cases = [* node .cases [:i ], * node .cases [i + 1 :]])
188227
228+ def _mutate_regex (inner : str ) -> list [str ]:
229+ """
230+ Generate ‘nasty’ variants of a regex body:
231+ - swap + ↔ * and ? ↔ *
232+ - turn `{0,1}` ↔ ?
233+ - turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
234+ - reverse the contents of any simple [...] class
235+ """
236+ muts : list [str ] = []
237+ # + <-> *
238+ if "+" in inner :
239+ muts .append (inner .replace ("+" , "*" ))
240+ if "*" in inner :
241+ muts .append (inner .replace ("*" , "+" ))
242+ # ? <-> *
243+ if "?" in inner :
244+ muts .append (inner .replace ("?" , "*" ))
245+ if "*" in inner :
246+ muts .append (inner .replace ("*" , "?" ))
247+ # {0,1} -> ? and ? -> {0,1}
248+ if re .search (r"\{0,1\}" , inner ):
249+ muts .append (re .sub (r"\{0,1\}" , "?" , inner ))
250+ if "?" in inner :
251+ muts .append (re .sub (r"\?" , "{0,1}" , inner ))
252+ # digit class ↔ shorthand
253+ if "\\ d" in inner :
254+ muts .append (inner .replace ("\\ d" , "[0-9]" ))
255+ if "[0-9]" in inner :
256+ muts .append (inner .replace ("[0-9]" , "\\ d" ))
257+ # word class ↔ shorthand
258+ if "\\ w" in inner :
259+ muts .append (inner .replace ("\\ w" , "[A-Za-z0-9_]" ))
260+ if "[A-Za-z0-9_]" in inner :
261+ muts .append (inner .replace ("[A-Za-z0-9_]" , "\\ w" ))
262+ # reverse simple character classes
263+ for mobj in re .finditer (r"\[([^\]]+)\]" , inner ):
264+ content = mobj .group (1 )
265+ rev = content [::- 1 ]
266+ orig = f"[{ content } ]"
267+ mutated = f"[{ rev } ]"
268+ muts .append (inner .replace (orig , mutated ))
269+ # dedupe, preserve order
270+ return list (dict .fromkeys (muts ))
271+
272+
273+ def operator_regex (node : cst .Call ) -> Iterable [cst .CSTNode ]:
274+ """
275+ Look for calls like re.compile(r'…'), re.match, re.search, etc.,
276+ extract the first SimpleString arg, apply _mutate_regex, and yield
277+ one mutant per new pattern.
278+ """
279+ if not m .matches (
280+ node ,
281+ m .Call (
282+ func = m .Attribute (
283+ value = m .Name ("re" ),
284+ attr = m .MatchIfTrue (
285+ lambda t : t .value
286+ in ("compile" , "match" , "search" , "fullmatch" , "findall" )
287+ ),
288+ ),
289+ args = [m .Arg (value = m .SimpleString ())],
290+ ),
291+ ):
292+ return
293+
294+ arg = node .args [0 ]
295+ lit : cst .SimpleString = arg .value # type: ignore
296+ raw = lit .value # e.g. r'\d+\w*'
297+ # strip off leading r/R
298+ prefix = ""
299+ body = raw
300+ if raw [:2 ].lower () == "r'" or raw [:2 ].lower () == 'r"' :
301+ prefix , body = raw [0 ], raw [1 :]
302+ quote = body [0 ]
303+ inner = body [1 :- 1 ]
304+
305+ for mutated_inner in _mutate_regex (inner ):
306+ new_raw = f"{ prefix } { quote } { mutated_inner } { quote } "
307+ new_lit = lit .with_changes (value = new_raw )
308+ new_arg = arg .with_changes (value = new_lit )
309+ yield node .with_changes (args = [new_arg , * node .args [1 :]])
310+
311+
312+ def operator_chr_ord (node : cst .Call ) -> Iterable [cst .CSTNode ]:
313+ """Adjust chr/ord calls slightly instead of swapping names."""
314+ if isinstance (node .func , cst .Name ) and node .args :
315+ name = node .func .value
316+ first_arg = node .args [0 ]
317+ if name == "chr" :
318+ incr = cst .BinaryOperation (
319+ left = first_arg .value ,
320+ operator = cst .Add (),
321+ right = cst .Integer ("1" ),
322+ )
323+ yield node .with_changes (args = [first_arg .with_changes (value = incr ), * node .args [1 :]])
324+ elif name == "ord" :
325+ new_call = node
326+ yield cst .BinaryOperation (left = new_call , operator = cst .Add (), right = cst .Integer ("1" ))
327+
328+
329+ def operator_enum_attribute (node : cst .Attribute ) -> Iterable [cst .CSTNode ]:
330+ """Swap common Enum base classes."""
331+ if not m .matches (node .value , m .Name ("enum" )):
332+ return
333+
334+ attr = node .attr
335+ if not isinstance (attr , cst .Name ):
336+ return
337+
338+ if attr .value == "Enum" :
339+ yield node .with_changes (attr = cst .Name ("StrEnum" ))
340+ yield node .with_changes (attr = cst .Name ("IntEnum" ))
341+ elif attr .value in {"StrEnum" , "IntEnum" }:
342+ yield node .with_changes (attr = cst .Name ("Enum" ))
343+
344+
189345# Operators that should be called on specific node types
190346mutation_operators : OPERATORS_TYPE = [
191347 (cst .BaseNumber , operator_number ),
@@ -197,6 +353,10 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
197353 (cst .UnaryOperation , operator_remove_unary_ops ),
198354 (cst .Call , operator_dict_arguments ),
199355 (cst .Call , operator_arg_removal ),
356+ (cst .Call , operator_chr_ord ),
357+ (cst .Call , operator_regex ),
358+ (cst .Call , operator_chr_ord ),
359+ (cst .Attribute , operator_enum_attribute ),
200360 (cst .Lambda , operator_lambda ),
201361 (cst .CSTNode , operator_keywords ),
202362 (cst .CSTNode , operator_swap_op ),
@@ -212,5 +372,3 @@ def _simple_mutation_mapping(
212372 if mutated_node_type :
213373 yield mutated_node_type ()
214374
215-
216- # TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes
0 commit comments