1
1
// Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
2
2
// Licensed under the Apache License, Version 2.0 (see LICENSE).
3
+ use std:: borrow:: Cow ;
3
4
use std:: path:: PathBuf ;
4
5
5
6
use fnv:: { FnvHashMap as HashMap , FnvHashSet as HashSet } ;
@@ -76,6 +77,12 @@ struct ImportCollector<'a> {
76
77
weaken_imports : bool ,
77
78
}
78
79
80
+ /// Input to `insert_import` to specify the base of an import statement.
81
+ enum BaseNode < ' a > {
82
+ Node ( tree_sitter:: Node < ' a > ) ,
83
+ StringNode ( String , tree_sitter:: Node < ' a > ) ,
84
+ }
85
+
79
86
impl ImportCollector < ' _ > {
80
87
pub fn new ( code : & ' _ str ) -> ImportCollector < ' _ > {
81
88
ImportCollector {
@@ -102,11 +109,46 @@ impl ImportCollector<'_> {
102
109
code:: at_range ( self . code , range)
103
110
}
104
111
105
- fn string_at ( & self , range : tree_sitter:: Range ) -> & str {
106
- // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
107
- self . code_at ( range)
108
- . trim_start_matches ( |c| "rRuUfFbB" . contains ( c) )
109
- . trim_matches ( |c| "'\" " . contains ( c) )
112
+ /// Extract an optional "import name" string from a string-related `tree_sitter::Node`. The string is
113
+ /// expected to analyze statically as an import name and so `extract_string` ignores any strings
114
+ /// containing escape sequences as well as any use of f-strings with interpolations.
115
+ fn extract_string ( & self , node : tree_sitter:: Node ) -> Option < String > {
116
+ match node. kind_id ( ) {
117
+ KindID :: STRING => {
118
+ if node. child_count ( ) != 3 {
119
+ // String literals are expected to have exactly three children consisting of `string_start`, `string_content`,
120
+ // and `string_end`. If there are more children, then it means that `interpolation` nodes are present, which
121
+ // means that the string is an f-string and not an import name that may be analyzed statically.
122
+ return None ;
123
+ }
124
+
125
+ let content_node = node. child ( 1 ) ?;
126
+ if content_node. kind_id ( ) != KindID :: STRING_CONTENT
127
+ || content_node. child_count ( ) > 0
128
+ {
129
+ // The `string_content` node is expected to have no children if the string is a simple string literal.
130
+ // If there are children, then it means that the string contains escape sequences and is not an import name
131
+ // (either an `escape_sequence` or `escape_interpolation` node).
132
+ return None ;
133
+ }
134
+
135
+ let content = code:: at_range ( self . code , content_node. range ( ) ) ;
136
+ Some ( content. to_string ( ) )
137
+ }
138
+ KindID :: CONCATENATED_STRING => {
139
+ let substrings: Vec < Option < String > > = node
140
+ . children ( & mut node. walk ( ) )
141
+ . filter ( |n| n. kind_id ( ) != KindID :: COMMENT )
142
+ . map ( |n| self . extract_string ( n) )
143
+ . collect ( ) ;
144
+ if substrings. iter ( ) . any ( |s| s. is_none ( ) ) {
145
+ return None ;
146
+ }
147
+ let substrings: Vec < String > = substrings. into_iter ( ) . flatten ( ) . collect ( ) ;
148
+ Some ( substrings. join ( "" ) )
149
+ }
150
+ _ => None ,
151
+ }
110
152
}
111
153
112
154
fn is_pragma_ignored_at_row ( & self , node : tree_sitter:: Node , end_row : usize ) -> bool {
@@ -170,36 +212,34 @@ impl ImportCollector<'_> {
170
212
/// from $base import * # (the * node is passed as `specific` too)
171
213
/// from $base import $specific
172
214
/// ```
173
- fn insert_import (
174
- & mut self ,
175
- base : tree_sitter:: Node ,
176
- specific : Option < tree_sitter:: Node > ,
177
- is_string : bool ,
178
- ) {
215
+ fn insert_import ( & mut self , base : BaseNode , specific : Option < tree_sitter:: Node > ) {
179
216
// the specifically-imported item takes precedence over the base name for ignoring and lines
180
217
// etc.
181
- let most_specific = specific. unwrap_or ( base) ;
218
+ let most_specific = match base {
219
+ BaseNode :: Node ( n) => specific. unwrap_or ( n) ,
220
+ BaseNode :: StringNode ( _, n) => specific. unwrap_or ( n) ,
221
+ } ;
182
222
183
223
if self . is_pragma_ignored ( most_specific) {
184
224
return ;
185
225
}
186
226
187
- let base = ImportCollector :: unnest_alias ( base) ;
227
+ let base_ref = match base {
228
+ BaseNode :: Node ( node) => {
229
+ let node = Self :: unnest_alias ( node) ;
230
+ Cow :: Borrowed ( self . code_at ( node. range ( ) ) )
231
+ }
232
+ BaseNode :: StringNode ( s, _) => Cow :: Owned ( s) ,
233
+ } ;
234
+
188
235
// * and errors are the same as not having an specific import
189
236
let specific = specific
190
237
. map ( ImportCollector :: unnest_alias)
191
238
. filter ( |n| !matches ! ( n. kind_id( ) , KindID :: WILDCARD_IMPORT | KindID :: ERROR ) ) ;
192
239
193
- let base_range = base. range ( ) ;
194
- let base_ref = if is_string {
195
- self . string_at ( base_range)
196
- } else {
197
- self . code_at ( base_range)
198
- } ;
199
-
200
240
let full_name = match specific {
201
241
Some ( specific) => {
202
- let specific_ref = self . code_at ( specific. range ( ) ) ;
242
+ let specific_ref = Cow :: Borrowed ( self . code_at ( specific. range ( ) ) ) ;
203
243
// `from ... import a` => `...a` should concat base_ref and specific_ref directly, but `from
204
244
// x import a` => `x.a` needs to insert a . between them
205
245
let joiner = if base_ref. ends_with ( '.' ) { "" } else { "." } ;
@@ -215,13 +255,24 @@ impl ImportCollector<'_> {
215
255
. and_modify ( |v| * v = ( v. 0 , v. 1 && self . weaken_imports ) )
216
256
. or_insert ( ( ( line0 as u64 ) + 1 , self . weaken_imports ) ) ;
217
257
}
258
+
259
+ fn handle_string_candidate ( & mut self , node : tree_sitter:: Node ) {
260
+ if let Some ( text) = self . extract_string ( node) {
261
+ if !text. contains ( |c : char | c. is_ascii_whitespace ( ) || c == '\\' )
262
+ && !self . is_pragma_ignored_recursive ( node)
263
+ {
264
+ self . string_candidates
265
+ . insert ( text, ( node. range ( ) . start_point . row + 1 ) as u64 ) ;
266
+ }
267
+ }
268
+ }
218
269
}
219
270
220
271
// NB: https://tree-sitter.github.io/tree-sitter/playground is very helpful
221
272
impl Visitor for ImportCollector < ' _ > {
222
273
fn visit_import_statement ( & mut self , node : tree_sitter:: Node ) -> ChildBehavior {
223
274
if !self . is_pragma_ignored ( node) {
224
- self . insert_import ( node. named_child ( 0 ) . unwrap ( ) , None , false ) ;
275
+ self . insert_import ( BaseNode :: Node ( node. named_child ( 0 ) . unwrap ( ) ) , None ) ;
225
276
}
226
277
ChildBehavior :: Ignore
227
278
}
@@ -236,7 +287,7 @@ impl Visitor for ImportCollector<'_> {
236
287
237
288
let mut any_inserted = false ;
238
289
for child in node. children_by_field_name ( "name" , & mut node. walk ( ) ) {
239
- self . insert_import ( module_name, Some ( child) , false ) ;
290
+ self . insert_import ( BaseNode :: Node ( module_name) , Some ( child) ) ;
240
291
any_inserted = true ;
241
292
}
242
293
@@ -246,7 +297,7 @@ impl Visitor for ImportCollector<'_> {
246
297
// manually.)
247
298
for child in node. children ( & mut node. walk ( ) ) {
248
299
if child. kind_id ( ) == KindID :: WILDCARD_IMPORT {
249
- self . insert_import ( module_name, Some ( child) , false ) ;
300
+ self . insert_import ( BaseNode :: Node ( module_name) , Some ( child) ) ;
250
301
any_inserted = true
251
302
}
252
303
}
@@ -257,7 +308,7 @@ impl Visitor for ImportCollector<'_> {
257
308
// understood the syntax tree! We're working on a definite import statement, so silently
258
309
// doing nothing with it is likely to be wrong. Let's insert the import node itself and let
259
310
// that be surfaced as an dep-inference failure.
260
- self . insert_import ( node, None , false )
311
+ self . insert_import ( BaseNode :: Node ( node) , None )
261
312
}
262
313
}
263
314
ChildBehavior :: Ignore
@@ -340,25 +391,24 @@ impl Visitor for ImportCollector<'_> {
340
391
341
392
let args = node. named_child ( 1 ) . unwrap ( ) ;
342
393
if let Some ( arg) = args. named_child ( 0 ) {
343
- if arg. kind_id ( ) == KindID :: STRING {
344
- // NB: Call nodes are children of expression nodes. The comment is a sibling of the expression.
394
+ if let Some ( content) = self . extract_string ( arg) {
345
395
if !self . is_pragma_ignored ( node. parent ( ) . unwrap ( ) ) {
346
- self . insert_import ( arg , None , true ) ;
396
+ self . insert_import ( BaseNode :: StringNode ( content , arg ) , None ) ;
347
397
}
348
398
}
349
399
}
400
+
401
+ // Do not descend below the `__import__` call statement.
402
+ ChildBehavior :: Ignore
403
+ }
404
+
405
+ fn visit_concatenated_string ( & mut self , node : tree_sitter:: Node ) -> ChildBehavior {
406
+ self . handle_string_candidate ( node) ;
350
407
ChildBehavior :: Ignore
351
408
}
352
409
353
410
fn visit_string ( & mut self , node : tree_sitter:: Node ) -> ChildBehavior {
354
- let range = node. range ( ) ;
355
- let text: & str = self . string_at ( range) ;
356
- if !text. contains ( |c : char | c. is_ascii_whitespace ( ) || c == '\\' )
357
- && !self . is_pragma_ignored_recursive ( node)
358
- {
359
- self . string_candidates
360
- . insert ( text. to_string ( ) , ( range. start_point . row + 1 ) as u64 ) ;
361
- }
411
+ self . handle_string_candidate ( node) ;
362
412
ChildBehavior :: Ignore
363
413
}
364
414
}
0 commit comments