1- """This module contains the mutations for indidvidual nodes, e.g. replacing a != b with a == b."""
1+ """This module contains the mutations for individual nodes, e.g. replacing a != b with a == b."""
2+ import re
23from typing import Any , Union
34from collections .abc import Callable , Iterable , Sequence
45import libcst as cst
@@ -107,11 +108,57 @@ def operator_keywords(
107108
108109
109110def operator_name (node : cst .Name ) -> Iterable [cst .CSTNode ]:
110- name_mappings = {
111+ name_mappings = {
111112 "True" : "False" ,
112113 "False" : "True" ,
113114 "deepcopy" : "copy" ,
114- # TODO: probably need to add a lot of things here... some builtins maybe, what more?
115+ "copy" : "deepcopy" ,
116+
117+ # common aggregates
118+ "len" : "sum" ,
119+ "sum" : "len" ,
120+ "min" : "max" ,
121+ "max" : "min" ,
122+
123+ # boolean checks
124+ "all" : "any" ,
125+ "any" : "all" ,
126+
127+ # ordering
128+ "sorted" : "reversed" ,
129+ "reversed" : "sorted" ,
130+
131+ # repr vs. str
132+ "str" : "repr" ,
133+ "repr" : "str" ,
134+
135+ # numeric types
136+ "int" : "float" ,
137+ "float" : "int" ,
138+
139+ # sequences vs. tuples
140+ "list" : "tuple" ,
141+ "tuple" : "list" ,
142+
143+ # set types
144+ "set" : "frozenset" ,
145+ "frozenset" : "set" ,
146+
147+ # byte types
148+ "bytes" : "bytearray" ,
149+ "bytearray" : "bytes" ,
150+
151+ # (optionally) mapping/filtering
152+ "map" : "filter" ,
153+ "filter" : "map" ,
154+
155+ # character/ordinal conversions
156+ "chr" : "ord" ,
157+ "ord" : "chr" ,
158+
159+ # dict ↔ set might be fun… however, beware lol
160+ # "dict": "set",
161+ # "set": "dict",
115162 }
116163 if node .value in name_mappings :
117164 yield node .with_changes (value = name_mappings [node .value ])
@@ -186,6 +233,85 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
186233 for i in range (len (node .cases )):
187234 yield node .with_changes (cases = [* node .cases [:i ], * node .cases [i + 1 :]])
188235
236+ def _mutate_regex (inner : str ) -> list [str ]:
237+ """
238+ Generate ‘nasty’ variants of a regex body:
239+ - swap + ↔ *
240+ - turn `{1,}` ↔ +
241+ - turn `\d` ↔ `[0-9]` and `\w` ↔ `[A-Za-z0-9_]`
242+ - reverse the contents of any simple [...] class
243+ """
244+ muts : list [str ] = []
245+ # + <-> *
246+ if "+" in inner :
247+ muts .append (inner .replace ("+" , "*" ))
248+ if "*" in inner :
249+ muts .append (inner .replace ("*" , "+" ))
250+ # {1,} -> + and + -> {1,}
251+ if re .search (r"\{1,\}" , inner ):
252+ muts .append (re .sub (r"\{1,\}" , "+" , inner ))
253+ if "+" in inner :
254+ muts .append (re .sub (r"\+" , "{1,}" , inner ))
255+ # digit class ↔ shorthand
256+ if "\\ d" in inner :
257+ muts .append (inner .replace ("\\ d" , "[0-9]" ))
258+ if "[0-9]" in inner :
259+ muts .append (inner .replace ("[0-9]" , "\\ d" ))
260+ # word class ↔ shorthand
261+ if "\\ w" in inner :
262+ muts .append (inner .replace ("\\ w" , "[A-Za-z0-9_]" ))
263+ if "[A-Za-z0-9_]" in inner :
264+ muts .append (inner .replace ("[A-Za-z0-9_]" , "\\ w" ))
265+ # reverse simple character classes
266+ for mobj in re .finditer (r"\[([^\]]+)\]" , inner ):
267+ content = mobj .group (1 )
268+ rev = content [::- 1 ]
269+ orig = f"[{ content } ]"
270+ mutated = f"[{ rev } ]"
271+ muts .append (inner .replace (orig , mutated ))
272+ # dedupe, preserve order
273+ return list (dict .fromkeys (muts ))
274+
275+
276+ def operator_regex (node : cst .Call ) -> Iterable [cst .CSTNode ]:
277+ """
278+ Look for calls like re.compile(r'…'), re.match, re.search, etc.,
279+ extract the first SimpleString arg, apply _mutate_regex, and yield
280+ one mutant per new pattern.
281+ """
282+ if not m .matches (
283+ node ,
284+ m .Call (
285+ func = m .Attribute (
286+ value = m .Name ("re" ),
287+ attr = m .MatchIfTrue (
288+ lambda t : t .value
289+ in ("compile" , "match" , "search" , "fullmatch" , "findall" )
290+ ),
291+ ),
292+ args = [m .Arg (value = m .SimpleString ())],
293+ ),
294+ ):
295+ return
296+
297+ arg = node .args [0 ]
298+ lit : cst .SimpleString = arg .value # type: ignore
299+ raw = lit .value # e.g. r'\d+\w*'
300+ # strip off leading r/R
301+ prefix = ""
302+ body = raw
303+ if raw [:2 ].lower () == "r'" or raw [:2 ].lower () == 'r"' :
304+ prefix , body = raw [0 ], raw [1 :]
305+ quote = body [0 ]
306+ inner = body [1 :- 1 ]
307+
308+ for mutated_inner in _mutate_regex (inner ):
309+ new_raw = f"{ prefix } { quote } { mutated_inner } { quote } "
310+ new_lit = lit .with_changes (value = new_raw )
311+ new_arg = arg .with_changes (value = new_lit )
312+ yield node .with_changes (args = [new_arg , * node .args [1 :]])
313+
314+
189315# Operators that should be called on specific node types
190316mutation_operators : OPERATORS_TYPE = [
191317 (cst .BaseNumber , operator_number ),
@@ -197,6 +323,7 @@ def operator_match(node: cst.Match) -> Iterable[cst.CSTNode]:
197323 (cst .UnaryOperation , operator_remove_unary_ops ),
198324 (cst .Call , operator_dict_arguments ),
199325 (cst .Call , operator_arg_removal ),
326+ (cst .Call , operator_regex ),
200327 (cst .Lambda , operator_lambda ),
201328 (cst .CSTNode , operator_keywords ),
202329 (cst .CSTNode , operator_swap_op ),
@@ -212,5 +339,3 @@ def _simple_mutation_mapping(
212339 if mutated_node_type :
213340 yield mutated_node_type ()
214341
215-
216- # TODO: detect regexes and mutate them in nasty ways? Maybe mutate all strings as if they are regexes
0 commit comments