Skip to content

Commit 7bf9bc9

Browse files
Python: Fix various parser issues (#6547)
* Python: Parser adaptability * Python: Add support for PEP 695 type parameters and subscript decorators - Implement parsing and printing for Python 3.12+ type parameters (TypeVar, TypeVarTuple, ParamSpec) in class and function definitions - Add support for subscript decorators (e.g., @[property][0]) - Fix f-string debug expression trailing newline issue - Add comprehensive tests for type parameters and decorators Co-Authored-By: Claude Opus 4.5 <[email protected]> * Python: Commit Python parser tests * Python: Add type parameters support to TypeAlias Add support for PEP 695 type parameters on type alias statements (e.g., `type Foo[T] = list[T]`). Co-Authored-By: Claude Opus 4.5 <[email protected]> * Fix more parser issues * Fix statement padding * Python: Handle UTF-8 BOM in source files - Detect and strip BOM character (U+FEFF) before parsing - Store BOM presence in CompilationUnit.charset_bom_marked - Output BOM when printing if file originally had one Co-Authored-By: Claude Opus 4.5 <[email protected]> * Python: Fix multi-line tuple unpacking in for loops The parser was incorrectly determining whether parentheses belong to a tuple or its first element. The heuristic only checked column offset, but for multi-line tuples like: for ( window, in_entry, ) in buf_out: The tuple and first element have the same column offset (4) but are on different lines. The fix now checks both line AND column to correctly identify that the '(' belongs to the tuple itself. Co-Authored-By: Claude Opus 4.5 <[email protected]> * Fix more parser problems * Fix more parser problems * Fix more parser problems * Yet more parser fixes * Yet more parser fixes * Unicode fixes * pep646 --------- Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent 16f358f commit 7bf9bc9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4483
-110
lines changed

rewrite-python/rewrite/src/rewrite/java/support_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def prefix(self) -> Space:
2525

2626
def is_acceptable(self, v: TreeVisitor[Any, P], p: P) -> bool:
2727
from .visitor import JavaVisitor
28-
return isinstance(v, JavaVisitor)
28+
return v.is_adaptable_to(JavaVisitor)
2929

3030
def accept(self, v: TreeVisitor[Any, P], p: P) -> Optional[Any]:
3131
from .visitor import JavaVisitor

rewrite-python/rewrite/src/rewrite/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def printer(self, cursor: Cursor) -> TreeVisitor[Tree, PrintOutputCapture[P]]:
144144
return PrinterFactory.current().create_printer(cursor)
145145

146146
def is_acceptable(self, v: TreeVisitor[Any, P], p: P) -> bool:
147-
return isinstance(v, ParseErrorVisitor)
147+
return v.is_adaptable_to(ParseErrorVisitor)
148148

149149
def accept(self, v: TreeVisitor[Any, P], p: P) -> Optional[Any]:
150150
return cast(ParseErrorVisitor, v).visit_parse_error(self, p)

rewrite-python/rewrite/src/rewrite/python/_parser_visitor.py

Lines changed: 691 additions & 105 deletions
Large diffs are not rendered by default.

rewrite-python/rewrite/src/rewrite/python/printer.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,9 @@ def _visit_container(
375375
def visit_compilation_unit(self, cu: 'py.CompilationUnit', p: PrintOutputCapture) -> J:
376376
"""Visit a Python compilation unit."""
377377
from rewrite.java.tree import Import
378+
# Output UTF-8 BOM if the original file had one
379+
if cu.charset_bom_marked:
380+
p.append('\ufeff')
378381
self._before_syntax(cu, Space.Location.COMPILATION_UNIT_PREFIX, p)
379382

380383
# Print imports
@@ -802,6 +805,8 @@ def visit_type_alias(self, alias: 'py.TypeAlias', p: PrintOutputCapture) -> J:
802805
self._before_syntax(alias, PySpace.Location.UNION_TYPE_PREFIX, p)
803806
p.append("type")
804807
self.visit(alias.name, p)
808+
# Visit type parameters (Python 3.12+ PEP 695)
809+
self._visit_container("[", alias.padding.type_parameters, JContainer.Location.TYPE_PARAMETERS, ",", "]", p)
805810
self._visit_left_padded("=", alias.padding.value, PyLeftPadded.Location.TYPE_ALIAS_VALUE, p)
806811
self._after_syntax(alias, p)
807812
return alias
@@ -973,6 +978,8 @@ def _visit_java(self, tree: J, p: PrintOutputCapture) -> Optional[J]:
973978
return self.visit_ternary(tree, p)
974979
elif isinstance(tree, j.Throw):
975980
return self.visit_throw(tree, p)
981+
elif isinstance(tree, j.TypeParameter):
982+
return self.visit_type_parameter(tree, p)
976983
elif isinstance(tree, j.Try):
977984
return self.visit_try(tree, p)
978985
elif isinstance(tree, j.Try.Resource):
@@ -1343,6 +1350,9 @@ def visit_class_declaration(self, class_decl: 'j.ClassDeclaration', p: PrintOutp
13431350
p.append("class")
13441351
self.visit(class_decl.name, p)
13451352

1353+
# Visit type parameters (Python 3.12+)
1354+
self._visit_container("[", class_decl.padding.type_parameters, JContainer.Location.TYPE_PARAMETERS, ",", "]", p)
1355+
13461356
# Visit implements (base classes in Python)
13471357
if class_decl.padding.implements:
13481358
omit_parens = class_decl.padding.implements.markers.find_first(OmitParentheses)
@@ -1531,6 +1541,8 @@ def visit_method_declaration(self, method: 'j.MethodDeclaration', p: PrintOutput
15311541
self.visit_modifier(mod, p)
15321542

15331543
self.visit(method.name, p)
1544+
# Visit type parameters (Python 3.12+)
1545+
self._visit_container("[", method.padding.type_parameters, JContainer.Location.TYPE_PARAMETERS, ",", "]", p)
15341546
self._visit_container("(", method.padding.parameters, JContainer.Location.METHOD_DECLARATION_PARAMETERS, ",", ")", p)
15351547
self.visit(method.return_type_expression, p)
15361548
self.visit(method.body, p)
@@ -1571,6 +1583,9 @@ def visit_modifier(self, mod: 'j.Modifier', p: PrintOutputCapture) -> J:
15711583
keyword = "def"
15721584
elif mod.type == Modifier.Type.Async:
15731585
keyword = "async"
1586+
elif mod.type == Modifier.Type.LanguageExtension:
1587+
# Use the keyword directly (for * and ** in type parameters)
1588+
keyword = mod.keyword
15741589

15751590
if keyword:
15761591
for annotation in mod.annotations:
@@ -1640,6 +1655,18 @@ def visit_throw(self, throw: 'j.Throw', p: PrintOutputCapture) -> J:
16401655
self._after_syntax(throw, p)
16411656
return throw
16421657

1658+
def visit_type_parameter(self, type_param: 'j.TypeParameter', p: PrintOutputCapture) -> J:
1659+
"""Visit a type parameter (Python 3.12+ PEP 695)."""
1660+
self._before_syntax(type_param, Space.Location.TYPE_PARAMETERS_PREFIX, p)
1661+
# Visit modifiers (for * and ** prefixes)
1662+
for mod in type_param.modifiers:
1663+
self.visit(mod, p)
1664+
self.visit(type_param.name, p)
1665+
# Visit bounds (for T: int style bounds in Python)
1666+
self._visit_container(":", type_param.padding.bounds, JContainer.Location.TYPE_BOUNDS, ",", "", p)
1667+
self._after_syntax(type_param, p)
1668+
return type_param
1669+
16431670
def visit_try(self, try_: 'j.Try', p: PrintOutputCapture) -> J:
16441671
"""Visit a try statement (or with statement in Python)."""
16451672
from rewrite.python import tree as py
@@ -1794,11 +1821,15 @@ def visit_variable_declarations(self, multi_variable: 'j.VariableDeclarations',
17941821

17951822
# Visit variables
17961823
nodes = multi_variable.padding.variables
1824+
is_kwonly_marker = multi_variable.markers.find_first(KeywordOnlyArguments) is not None
17971825
for i, node in enumerate(nodes):
17981826
# Set cursor for context in visit_variable
17991827
self.set_cursor(Cursor(self.get_cursor(), node))
18001828
self.visit(node.element, p)
18011829
self._visit_markers(node.markers, p)
1830+
# For keyword-only args marker (bare *), print the after space before comma
1831+
if is_kwonly_marker:
1832+
self._visit_space(node.after, JRightPadded.Location.NAMED_VARIABLE.after_location, p)
18021833
if i < len(nodes) - 1:
18031834
p.append(",")
18041835
# Restore cursor

rewrite-python/rewrite/src/rewrite/python/support_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def accept_python(self, v: 'PythonVisitor[P]', p: P) -> Optional['J']:
2424

2525
def is_acceptable(self, v: TreeVisitor[Any, P], p: P) -> bool:
2626
from .visitor import PythonVisitor
27-
return isinstance(v, PythonVisitor)
27+
return v.is_adaptable_to(PythonVisitor)
2828

2929

3030
class PySpace:

rewrite-python/rewrite/src/rewrite/python/tree.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,13 @@ def name(self) -> Identifier:
14861486
return self._name
14871487

14881488

1489+
_type_parameters: Optional[JContainer[j.TypeParameter]]
1490+
1491+
@property
1492+
def type_parameters(self) -> Optional[List[j.TypeParameter]]:
1493+
return self._type_parameters.elements if self._type_parameters else None
1494+
1495+
14891496
_value: JLeftPadded[J]
14901497

14911498
@property
@@ -1504,6 +1511,10 @@ def type(self) -> Optional[JavaType]:
15041511
class PaddingHelper:
15051512
_t: TypeAlias
15061513

1514+
@property
1515+
def type_parameters(self) -> Optional[JContainer[j.TypeParameter]]:
1516+
return self._t._type_parameters
1517+
15071518
@property
15081519
def value(self) -> JLeftPadded[J]:
15091520
return self._t._value

rewrite-python/rewrite/src/rewrite/rpc/server.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,11 @@ def parse_python_source(source: str, path: str = "<unknown>") -> dict:
207207
from rewrite.python._parser_visitor import ParserVisitor
208208
from rewrite import random_id, Markers
209209

210+
# Strip BOM before parsing (ParserVisitor handles it internally but ast.parse doesn't)
211+
source_for_ast = source[1:] if source.startswith('\ufeff') else source
212+
210213
# Parse using Python AST
211-
tree = ast.parse(source, path)
214+
tree = ast.parse(source_for_ast, path)
212215

213216
# Convert to OpenRewrite LST
214217
cu = ParserVisitor(source).visit(tree)

rewrite-python/rewrite/src/rewrite/test/rewrite_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,9 @@ def _parse_python(self, source: str, source_path: Path) -> CompilationUnit:
218218
from rewrite.python._parser_visitor import ParserVisitor
219219

220220
visitor = ParserVisitor(source)
221-
tree = ast.parse(source)
221+
# Strip BOM before passing to ast.parse (ParserVisitor does this internally)
222+
source_for_ast = source[1:] if source.startswith('\ufeff') else source
223+
tree = ast.parse(source_for_ast)
222224
cu = visitor.visit_Module(tree)
223225
return cu.replace(source_path=source_path)
224226

rewrite-python/rewrite/src/rewrite/visitor.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,30 @@ def visit_markers(self, markers: Markers, p: P) -> Markers:
177177
def visit_marker(self, marker: Marker, p: P) -> Marker:
178178
return marker
179179

180+
def stop_after_pre_visit(self) -> None:
181+
"""
182+
Stop visiting after pre_visit returns, preventing accept() and post_visit() from being called.
183+
Call this in pre_visit when you only want to process at a high level without traversing children.
184+
"""
185+
self._cursor.put_message("STOP_AFTER_PRE_VISIT", True)
186+
187+
def is_adaptable_to(self, adapt_to: Type['TreeVisitor[Any, Any]']) -> bool:
188+
"""
189+
Check if this visitor can be adapted to the given visitor type.
190+
191+
A visitor is adaptable if:
192+
1. It is already an instance of the target type, OR
193+
2. It is a base TreeVisitor (not a language-specific visitor)
194+
195+
This is a simplified implementation. Full adaptation support
196+
would check tree type hierarchies like Java does.
197+
"""
198+
if isinstance(self, adapt_to):
199+
return True
200+
# Base TreeVisitor is adaptable to any visitor type
201+
# (its tree type is Tree, which is a supertype of all tree types)
202+
return type(self).__mro__[1] == TreeVisitor or type(self) == TreeVisitor
203+
180204
def adapt(self, tree_type, visitor_type: Type[TV]) -> TV:
181205
# FIXME implement the visitor adapting
182206
return cast(TV, self)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from rewrite.test import RecipeSpec, python
2+
3+
4+
def test_empty():
5+
# language=python
6+
RecipeSpec().rewrite_run(python("a = []"))
7+
8+
9+
def test_trailing_comma():
10+
# language=python
11+
RecipeSpec().rewrite_run(python("a = [1, 2, ]"))
12+
13+
14+
def test_array_subscript():
15+
# language=python
16+
RecipeSpec().rewrite_run(python("a = [1, 2][0]"))
17+
18+
19+
def test_array_slice():
20+
# language=python
21+
RecipeSpec().rewrite_run(python("a = [1, 2][0:1]"))
22+
23+
24+
def test_array_slice_no_upper():
25+
# language=python
26+
RecipeSpec().rewrite_run(python("a = [1, 2][0:]"))
27+
28+
29+
def test_array_slice_all_empty():
30+
# language=python
31+
RecipeSpec().rewrite_run(python("a = [1, 2][ : : ]"))
32+
33+
34+
def test_comment():
35+
# language=python
36+
RecipeSpec().rewrite_run(python(r'''
37+
a = d[:0]
38+
a = d[0:]
39+
'''
40+
))
41+
42+
43+
def test_array_slice_empty_upper_and_step():
44+
# language=python
45+
RecipeSpec().rewrite_run(python("a = [1, 2][0::]"))
46+
47+
48+
def test_array_slice_no_lower():
49+
# language=python
50+
RecipeSpec().rewrite_run(python("a = [1, 2][:1]"))
51+
52+
53+
def test_array_slice_no_lower_no_upper():
54+
# language=python
55+
RecipeSpec().rewrite_run(python("a = [1, 2][::1]"))
56+
57+
58+
def test_array_slice_full():
59+
# language=python
60+
RecipeSpec().rewrite_run(python("a = [1, 2][0:1:1]"))
61+
62+
63+
def test_array_slice_tuple_index_1():
64+
# language=python
65+
RecipeSpec().rewrite_run(python("a = [1, 2][0,1]"))
66+
67+
68+
def test_array_slice_tuple_index_2():
69+
# language=python
70+
RecipeSpec().rewrite_run(python("a = [1, 2][(0,1)]"))

0 commit comments

Comments
 (0)