Skip to content

Commit c83110f

Browse files
committed
perf: skip syn parsing for files that don't contain the search term
Add a text pre-filter to index_crate and find_impls: before parsing a file with syn, check if the raw text contains the item name. Files that don't match are skipped entirely (only lightweight line scanning for mod declarations). If the fast pass finds nothing, fall back to full parsing. On k8s_openapi PodSpec (--api), index scan drops from 2.4s to 470ms (5x) and find_impls from 110ms to 56ms (2x).
1 parent da59c5b commit c83110f

File tree

1 file changed

+133
-80
lines changed

1 file changed

+133
-80
lines changed

src/index.rs

Lines changed: 133 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,42 @@ pub struct ImplBlock {
2727
pub end_line: usize,
2828
}
2929

30+
/// Options threaded through the recursive collect/index/walk functions.
31+
#[derive(Clone, Copy)]
32+
struct CollectOpts<'a> {
33+
name_filter: Option<&'a str>,
34+
pub_only: bool,
35+
out_dir: Option<&'a Path>,
36+
text_filter: Option<&'a str>,
37+
}
38+
3039
/// Build an index of items under `dirs` matching `item_name`.
40+
/// Uses a fast text pre-filter: only parses files containing `item_name`.
41+
/// Falls back to full parse if the fast pass finds nothing.
3142
pub fn index_crate(
3243
dirs: &[PathBuf],
3344
item_name: &str,
3445
pub_only: bool,
3546
out_dir: Option<&Path>,
3647
) -> Result<Vec<IndexEntry>> {
48+
let opts = CollectOpts {
49+
name_filter: Some(item_name),
50+
pub_only,
51+
out_dir,
52+
text_filter: Some(item_name),
53+
};
3754
let mut entries = Vec::new();
3855
for dir in dirs {
39-
entries.extend(collect(dir, Some(item_name), pub_only, out_dir)?);
56+
entries.extend(collect(dir, opts)?);
57+
}
58+
if entries.is_empty() {
59+
let opts = CollectOpts {
60+
text_filter: None,
61+
..opts
62+
};
63+
for dir in dirs {
64+
entries.extend(collect(dir, opts)?);
65+
}
4066
}
4167
Ok(entries)
4268
}
@@ -47,9 +73,15 @@ pub fn list_items(
4773
pub_only: bool,
4874
out_dir: Option<&Path>,
4975
) -> Result<Vec<IndexEntry>> {
76+
let opts = CollectOpts {
77+
name_filter: None,
78+
pub_only,
79+
out_dir,
80+
text_filter: None,
81+
};
5082
let mut entries = Vec::new();
5183
for dir in dirs {
52-
entries.extend(collect(dir, None, pub_only, out_dir)?);
84+
entries.extend(collect(dir, opts)?);
5385
}
5486
Ok(entries)
5587
}
@@ -76,6 +108,9 @@ pub fn find_impls(
76108
fn find_impls_in_file(path: &Path, type_name: &str) -> Result<Vec<ImplBlock>> {
77109
let source =
78110
fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?;
111+
if !source.contains(type_name) {
112+
return Ok(vec![]);
113+
}
79114
let Ok(file) = syn::parse_file(&source) else {
80115
return Ok(vec![]);
81116
};
@@ -114,12 +149,7 @@ fn impl_is_for(imp: &syn::ItemImpl, type_name: &str) -> bool {
114149
}
115150
}
116151

117-
fn collect(
118-
src_dir: &Path,
119-
name_filter: Option<&str>,
120-
pub_only: bool,
121-
out_dir: Option<&Path>,
122-
) -> Result<Vec<IndexEntry>> {
152+
fn collect(src_dir: &Path, opts: CollectOpts) -> Result<Vec<IndexEntry>> {
123153
let mut entries = Vec::new();
124154
let mut reexports = Vec::new();
125155
let mut visited = HashSet::new();
@@ -131,30 +161,13 @@ fn collect(
131161
} else if main_rs.exists() {
132162
main_rs
133163
} else {
134-
walk_all_rs(
135-
src_dir,
136-
name_filter,
137-
pub_only,
138-
out_dir,
139-
&mut entries,
140-
&mut reexports,
141-
&mut visited,
142-
)?;
143-
resolve_reexports(&mut entries, &reexports, name_filter);
164+
walk_all_rs(src_dir, opts, &mut entries, &mut reexports, &mut visited)?;
165+
resolve_reexports(&mut entries, &reexports, opts.name_filter);
144166
return Ok(entries);
145167
};
146168

147-
index_file(
148-
&entry,
149-
"",
150-
name_filter,
151-
pub_only,
152-
out_dir,
153-
&mut entries,
154-
&mut reexports,
155-
&mut visited,
156-
)?;
157-
resolve_reexports(&mut entries, &reexports, name_filter);
169+
index_file(&entry, "", opts, &mut entries, &mut reexports, &mut visited)?;
170+
resolve_reexports(&mut entries, &reexports, opts.name_filter);
158171
Ok(entries)
159172
}
160173

@@ -221,13 +234,10 @@ fn reexport_path_matches(entry_module_path: &str, source_segments: &[String]) ->
221234
entry_module_path == expected || entry_module_path.ends_with(&format!("::{expected}"))
222235
}
223236

224-
#[allow(clippy::too_many_arguments)]
225237
fn index_file(
226238
path: &Path,
227239
module_path: &str,
228-
name_filter: Option<&str>,
229-
pub_only: bool,
230-
out_dir: Option<&Path>,
240+
opts: CollectOpts,
231241
entries: &mut Vec<IndexEntry>,
232242
reexports: &mut Vec<ReExport>,
233243
visited: &mut HashSet<PathBuf>,
@@ -239,17 +249,59 @@ fn index_file(
239249

240250
let source =
241251
fs::read_to_string(path).with_context(|| format!("failed to read {}", path.display()))?;
252+
253+
// When text_filter is set and the file doesn't contain the text,
254+
// skip the expensive parse+visit but still follow mod declarations
255+
// by scanning for `mod <name>;` lines.
256+
if opts.text_filter.is_some_and(|tf| !source.contains(tf)) {
257+
let parent_dir = path.parent().unwrap();
258+
for line in source.lines() {
259+
let trimmed = line.trim();
260+
let rest = trimmed
261+
.strip_prefix("pub")
262+
.and_then(|s| {
263+
if s.starts_with(' ') {
264+
Some(s.trim_start())
265+
} else if s.starts_with('(') {
266+
s.find(')').map(|i| s[i + 1..].trim_start())
267+
} else {
268+
None
269+
}
270+
})
271+
.unwrap_or(trimmed);
272+
let Some(rest) = rest.strip_prefix("mod ") else {
273+
continue;
274+
};
275+
let Some(mod_name) = rest.strip_suffix(';') else {
276+
continue;
277+
};
278+
let mod_name = mod_name.trim();
279+
if mod_name.is_empty() || mod_name.contains(' ') {
280+
continue;
281+
}
282+
if let Some(child) = resolve_mod_file(parent_dir, mod_name) {
283+
let child_mod = if module_path.is_empty() {
284+
mod_name.to_string()
285+
} else {
286+
format!("{module_path}::{mod_name}")
287+
};
288+
index_file(&child, &child_mod, opts, entries, reexports, visited)?;
289+
}
290+
}
291+
return Ok(());
292+
}
293+
242294
let file = syn::parse_file(&source).ok();
243295
let Some(file) = file else {
244296
return Ok(());
245297
};
246298

247299
let mut visitor = ItemVisitor {
248-
name_filter,
249-
pub_only,
300+
name_filter: opts.name_filter,
301+
pub_only: opts.pub_only,
250302
module_path: module_path.to_string(),
251303
file_path: path.to_path_buf(),
252-
out_dir: out_dir.map(Path::to_path_buf),
304+
out_dir: opts.out_dir.map(Path::to_path_buf),
253305
entries,
254306
included_files: Vec::new(),
255307
};
@@ -258,16 +310,7 @@ fn index_file(
258310

259311
// Index any files discovered via include!() macros
260312
for (inc_path, inc_module) in included_files {
261-
index_file(
262-
&inc_path,
263-
&inc_module,
264-
name_filter,
265-
pub_only,
266-
out_dir,
267-
entries,
268-
reexports,
269-
visited,
270-
)?;
313+
index_file(&inc_path, &inc_module, opts, entries, reexports, visited)?;
271314
}
272315

273316
// Collect `pub use` re-exports and follow `mod` declarations
@@ -286,16 +329,7 @@ fn index_file(
286329
} else {
287330
format!("{module_path}::{mod_name}")
288331
};
289-
index_file(
290-
&child,
291-
&child_mod,
292-
name_filter,
293-
pub_only,
294-
out_dir,
295-
entries,
296-
reexports,
297-
visited,
298-
)?;
332+
index_file(&child, &child_mod, opts, entries, reexports, visited)?;
299333
}
300334
}
301335
_ => {}
@@ -387,9 +421,7 @@ fn gather_rs_files_walk(dir: &Path, files: &mut Vec<PathBuf>) {
387421

388422
fn walk_all_rs(
389423
dir: &Path,
390-
name_filter: Option<&str>,
391-
pub_only: bool,
392-
out_dir: Option<&Path>,
424+
opts: CollectOpts,
393425
entries: &mut Vec<IndexEntry>,
394426
reexports: &mut Vec<ReExport>,
395427
visited: &mut HashSet<PathBuf>,
@@ -398,26 +430,9 @@ fn walk_all_rs(
398430
let entry = entry?;
399431
let path = entry.path();
400432
if path.is_dir() {
401-
walk_all_rs(
402-
&path,
403-
name_filter,
404-
pub_only,
405-
out_dir,
406-
entries,
407-
reexports,
408-
visited,
409-
)?;
433+
walk_all_rs(&path, opts, entries, reexports, visited)?;
410434
} else if path.extension().is_some_and(|e| e == "rs") {
411-
index_file(
412-
&path,
413-
"",
414-
name_filter,
415-
pub_only,
416-
out_dir,
417-
entries,
418-
reexports,
419-
visited,
420-
)?;
435+
index_file(&path, "", opts, entries, reexports, visited)?;
421436
}
422437
}
423438
Ok(())
@@ -665,7 +680,16 @@ mod tests {
665680
let lib_rs = dir.path().join("lib.rs");
666681
let mut f = fs::File::create(&lib_rs).unwrap();
667682
f.write_all(source.as_bytes()).unwrap();
668-
collect(dir.path(), None, false, None).unwrap()
683+
collect(
684+
dir.path(),
685+
CollectOpts {
686+
name_filter: None,
687+
pub_only: false,
688+
out_dir: None,
689+
text_filter: None,
690+
},
691+
)
692+
.unwrap()
669693
}
670694

671695
#[test]
@@ -709,6 +733,26 @@ mod tests {
709733
assert_eq!(entries[0].module_path, "");
710734
}
711735

736+
#[test]
737+
fn text_filter_follows_pub_crate_mod() {
738+
// Foo exists in both `a.rs` (via `pub mod a`) and `b.rs` (via
739+
// `pub(crate) mod b`). The fast path finds the first Foo so the
740+
// fallback never triggers — if the text scanner can't follow
741+
// `pub(crate) mod`, the second Foo is silently lost.
742+
let dir = tempfile::tempdir().unwrap();
743+
let src = dir.path().join("src");
744+
fs::create_dir_all(&src).unwrap();
745+
fs::write(src.join("lib.rs"), "pub mod a;\npub(crate) mod b;").unwrap();
746+
fs::write(src.join("a.rs"), "pub struct Foo;").unwrap();
747+
fs::write(src.join("b.rs"), "pub struct Foo;").unwrap();
748+
let entries = index_crate(&[src], "Foo", false, None).unwrap();
749+
assert_eq!(
750+
entries.len(),
751+
2,
752+
"expected Foo from both modules, got: {entries:?}"
753+
);
754+
}
755+
712756
#[test]
713757
fn pub_only_filters_private_const() {
714758
let dir = tempfile::tempdir().unwrap();
@@ -718,7 +762,16 @@ mod tests {
718762
"const PRIVATE: u32 = 1;\npub const PUBLIC: u32 = 2;",
719763
)
720764
.unwrap();
721-
let entries = collect(dir.path(), None, true, None).unwrap();
765+
let entries = collect(
766+
dir.path(),
767+
CollectOpts {
768+
name_filter: None,
769+
pub_only: true,
770+
out_dir: None,
771+
text_filter: None,
772+
},
773+
)
774+
.unwrap();
722775
assert_eq!(entries.len(), 1);
723776
assert_eq!(entries[0].name, "PUBLIC");
724777
}

0 commit comments

Comments
 (0)