diff --git a/Cargo.lock b/Cargo.lock index 5c1fd086cce1..58e7d4dc1bb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -140,7 +140,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -151,7 +151,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -534,9 +534,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.12" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70" +checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c" dependencies = [ "futures-util", "pin-project-lite", @@ -703,9 +703,9 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.14" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" +checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" dependencies = [ "xmlparser", ] @@ -1956,7 +1956,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -3125,7 +3125,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -3417,7 +3417,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -4323,6 +4323,7 @@ dependencies = [ "posthog-rs", "pulldown-cmark", "rand 0.8.5", + "rayon", "regex", "reqwest 0.13.2", "rmcp 0.16.0", @@ -4350,6 +4351,16 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", + "tree-sitter", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-kotlin-ng", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-swift", + "tree-sitter-typescript", "unbinder", "unicode-normalization", "url", @@ -4996,7 +5007,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", + "socket2 0.5.10", "system-configuration 0.7.0", "tokio", "tower-service", @@ -5463,7 +5474,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -6218,7 +6229,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -7654,7 +7665,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.1", "rustls 0.23.36", - "socket2 0.6.2", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -7692,9 +7703,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.2", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.52.0", ] [[package]] @@ -8415,7 +8426,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -8495,7 +8506,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -10281,7 +10292,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix 1.1.3", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -11083,6 +11094,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "triomphe" version = "0.1.15" diff --git a/Cargo.toml b/Cargo.toml index 0db56f2cdec4..c571412c98e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,17 @@ opentelemetry-appender-tracing = "0.31" opentelemetry-stdout = { version = "0.31", features = ["trace", "metrics", "logs"] } tracing-opentelemetry = "0.32" +rayon = "1.10" +tree-sitter = "0.26" +tree-sitter-go = "0.25" +tree-sitter-java = "0.23" +tree-sitter-javascript = "0.25" +tree-sitter-kotlin-ng = "1.1" +tree-sitter-python = "0.25" +tree-sitter-ruby = "0.23" +tree-sitter-rust = "0.24" +tree-sitter-swift = "0.7" +tree-sitter-typescript = "0.23" [patch.crates-io] v8 = { path = "vendor/v8" } diff --git a/crates/goose/Cargo.toml b/crates/goose/Cargo.toml index 21f9a9c220e7..31b4018c10e0 100644 --- a/crates/goose/Cargo.toml +++ b/crates/goose/Cargo.toml @@ -123,6 +123,17 @@ posthog-rs = "0.3.7" shellexpand = { workspace = true } indexmap = "2.12.0" ignore = { workspace = true } +rayon = { workspace = true } +tree-sitter = { workspace = true } +tree-sitter-go = { workspace = true } +tree-sitter-java = { workspace = true } +tree-sitter-javascript = { workspace = true } +tree-sitter-kotlin-ng = { workspace = true } +tree-sitter-python = { workspace = true } +tree-sitter-ruby = { workspace = true } +tree-sitter-rust = { workspace = true } +tree-sitter-swift = { workspace = true } +tree-sitter-typescript = { workspace = true } which = { workspace = true } pctx_code_mode = { version = "^0.2.3", optional = true } unbinder = "0.1.7" @@ -161,6 +172,10 @@ name = "databricks_oauth" path = "examples/databricks_oauth.rs" +[[bin]] +name = "analyze_cli" +path = "src/bin/analyze_cli.rs" + [[bin]] name = "build_canonical_models" path = "src/providers/canonical/build_canonical_models.rs" diff --git a/crates/goose/src/agents/platform_extensions/analyze/format.rs b/crates/goose/src/agents/platform_extensions/analyze/format.rs new file mode 100644 index 000000000000..135f64f58e24 --- /dev/null +++ b/crates/goose/src/agents/platform_extensions/analyze/format.rs @@ -0,0 +1,458 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Write; +use std::path::Path; + +use super::graph::CallGraph; +use super::parser::FileAnalysis; + +const SIZE_LIMIT: usize = 50_000; +const MULTILINE_THRESHOLD: usize = 10; + +pub fn format_structure( + analyses: &[FileAnalysis], + root: &Path, + depth: u32, + total_files: usize, +) -> String { + let mut out = String::new(); + + let total_loc: usize = analyses.iter().map(|a| a.loc).sum(); + let total_funcs: usize = analyses.iter().map(|a| a.functions.len()).sum(); + let total_classes: usize = analyses.iter().map(|a| a.classes.len()).sum(); + + let depth_str = if depth == 0 { + "unlimited".to_string() + } else { + format!("depth={}", depth) + }; + let _ = writeln!( + out, + "{} files, {}L, {}F, {}C ({})", + analyses.len(), + total_loc, + total_funcs, + total_classes, + depth_str + ); + + let skipped = total_files.saturating_sub(analyses.len()); + if skipped > 0 { + let _ = writeln!(out, "({} files skipped: no parser)", skipped); + } + + let mut lang_loc: HashMap<&str, usize> = HashMap::new(); + for a in analyses { + if !a.language.is_empty() && a.loc > 0 { + *lang_loc.entry(a.language).or_default() += a.loc; + } + } + if !lang_loc.is_empty() && total_loc > 0 { + let mut langs: Vec<_> = lang_loc.into_iter().collect(); + langs.sort_by(|a, b| b.1.cmp(&a.1)); + let parts: Vec = langs + .iter() + .map(|(lang, loc)| { + let pct = (*loc as f64 / total_loc as f64 * 100.0) as u32; + format!("{} {}%", lang, pct) + }) + .collect(); + let _ = writeln!(out, "{}", parts.join(" | ")); + } + out.push('\n'); + + let tree = build_dir_tree(analyses, root); + render_tree(&mut out, &tree, 0); + + out +} + +pub fn format_semantic(analysis: &FileAnalysis, root: &Path) -> String { + let mut out = String::new(); + + let display_path = analysis.path.strip_prefix(root).unwrap_or(&analysis.path); + + let _ = write!( + out, + "{} [{}L, {}F", + display_path.display(), + analysis.loc, + analysis.functions.len() + ); + if !analysis.classes.is_empty() { + let _ = write!(out, ", {}C", analysis.classes.len()); + } + out.push_str("]\n\n"); + + if !analysis.classes.is_empty() { + let items: Vec = analysis + .classes + .iter() + .map(|c| { + let detail = c.detail.as_deref().unwrap_or(""); + if detail.is_empty() { + format!("{}:{}", c.name, c.line) + } else { + format!("{}:{}{}", c.name, c.line, detail) + } + }) + .collect(); + format_symbol_list(&mut out, "C:", &items); + } + + if !analysis.functions.is_empty() { + let mut call_counts: HashMap<&str, usize> = HashMap::new(); + for call in &analysis.calls { + let bare = call.callee.rsplit("::").next().unwrap_or(&call.callee); + *call_counts.entry(bare).or_default() += 1; + } + + let items: Vec = analysis + .functions + .iter() + .map(|f| { + let mut label = String::new(); + if let Some(ref parent) = f.parent { + label.push_str(parent); + label.push('.'); + } + label.push_str(&f.name); + if let Some(ref detail) = f.detail { + label.push_str(detail); + } + let count = call_counts.get(f.name.as_str()).copied().unwrap_or(0); + if count > 3 { + format!("{}:{}•{}", label, f.line, count) + } else { + format!("{}:{}", label, f.line) + } + }) + .collect(); + format_symbol_list(&mut out, "F:", &items); + } + + if !analysis.imports.is_empty() { + out.push_str("I: "); + let items: Vec = analysis + .imports + .iter() + .map(|i| { + if i.count > 1 { + format!("{}({})", i.module, i.count) + } else { + i.module.clone() + } + }) + .collect(); + out.push_str(&items.join("; ")); + out.push('\n'); + } + + out +} + +fn format_symbol_list(out: &mut String, prefix: &str, items: &[String]) { + if items.len() > MULTILINE_THRESHOLD { + let _ = writeln!(out, "{}", prefix); + for item in items { + let _ = writeln!(out, " {}", item); + } + } else { + let _ = write!(out, "{} ", prefix); + out.push_str(&items.join(" ")); + out.push('\n'); + } +} + +pub fn format_focused( + symbol: &str, + graph: &CallGraph, + follow_depth: u32, + files_analyzed: usize, + root: &Path, +) -> String { + let defs = graph.definitions(symbol); + + // Always count direct neighbors at depth=1, independent of follow_depth, + // so the ref count is accurate even when follow_depth=0. + let depth1_in = graph.incoming(symbol, 1); + let depth1_out = graph.outgoing(symbol, 1); + + if defs.is_empty() && depth1_in.is_empty() && depth1_out.is_empty() { + return format!( + "Symbol '{}' not found in {} analyzed files.\n", + symbol, files_analyzed + ); + } + + let incoming = graph.incoming(symbol, follow_depth); + let outgoing = graph.outgoing(symbol, follow_depth); + + let mut out = String::new(); + + let direct_callers: HashSet<_> = depth1_in + .iter() + .filter_map(|chain| chain.get(1)) + .map(|link| (&link.file, &link.name, link.line)) + .collect(); + + let direct_callees: HashSet<_> = depth1_out + .iter() + .filter_map(|chain| chain.get(1)) + .map(|link| (&link.file, &link.name, link.line)) + .collect(); + + let ref_count = direct_callers.len() + direct_callees.len(); + let _ = writeln!( + out, + "FOCUS: {} ({} defs, {} refs)\n", + symbol, + defs.len(), + ref_count + ); + + for d in &defs { + let display = d.file.strip_prefix(root).unwrap_or(&d.file); + let _ = writeln!(out, "DEF {}:{}:{}", display.display(), d.name, d.line); + } + if !defs.is_empty() { + out.push('\n'); + } + + let (in_prod, in_test) = partition_test_chains(&incoming); + format_chain_group(&mut out, "IN", &in_prod, root); + format_chain_group(&mut out, "IN (tests)", &in_test, root); + + let (out_prod, out_test) = partition_test_chains(&outgoing); + format_chain_group(&mut out, "OUT", &out_prod, root); + format_chain_group(&mut out, "OUT (tests)", &out_test, root); + + let _ = writeln!(out, "{} files analyzed", files_analyzed); + + out +} + +type Chain = Vec; + +fn format_chain_link(link: &super::graph::ChainLink, root: &Path) -> String { + let display = link.file.strip_prefix(root).unwrap_or(&link.file); + format!("{}:{}:{}", display.display(), link.name, link.line) +} + +fn format_chain_group(out: &mut String, label: &str, chains: &[Chain], root: &Path) { + if chains.is_empty() { + return; + } + + let mut formatted: Vec> = chains + .iter() + .map(|chain| { + chain + .iter() + .map(|link| format_chain_link(link, root)) + .collect() + }) + .collect(); + formatted.sort(); + + let _ = writeln!(out, "{}:", label); + let mut i = 0; + while i < formatted.len() { + let chain = &formatted[i]; + let mut group_end = i + 1; + if chain.len() >= 2 { + let prefix = &chain[..chain.len() - 1]; + while group_end < formatted.len() { + let next = &formatted[group_end]; + if next.len() >= 2 && next[..next.len() - 1] == *prefix { + group_end += 1; + } else { + break; + } + } + } + if group_end - i > 1 { + let prefix = &chain[..chain.len() - 1]; + let _ = writeln!(out, " {}", prefix.join(" → ")); + for entry in &formatted[i..group_end] { + if let Some(tail) = entry.last() { + let _ = writeln!(out, " → {}", tail); + } + } + } else { + let _ = writeln!(out, " {}", chain.join(" → ")); + } + i = group_end; + } + out.push('\n'); +} + +fn is_test_chain(chain: &[super::graph::ChainLink]) -> bool { + chain.iter().any(|link| { + if link.name.starts_with("test_") || link.name.contains("_test") { + return true; + } + let f = link.file.to_string_lossy(); + // Rust / Python + f.ends_with("_test.rs") + || f.ends_with("_test.py") + // JavaScript / TypeScript + || f.ends_with(".test.ts") + || f.ends_with(".test.js") + || f.ends_with(".test.tsx") + || f.ends_with(".test.jsx") + // Go + || f.ends_with("_test.go") + // Java + || f.ends_with("Test.java") + || f.ends_with("Tests.java") + // Kotlin + || f.ends_with("Test.kt") + // Ruby (RSpec + Minitest) + || f.ends_with("_spec.rb") + || f.ends_with("_test.rb") + // Swift + || f.ends_with("Test.swift") + || f.ends_with("Tests.swift") + // Directory conventions + || f.contains("/tests/") + || f.contains("/test/") + || f.contains("/src/test/") // Java/Kotlin (Maven/Gradle) + || f.contains("/spec/") // Ruby (RSpec) + || f.contains("/Tests/") // Swift Package Manager + }) +} + +fn partition_test_chains(chains: &[Chain]) -> (Vec, Vec) { + let mut prod = Vec::new(); + let mut test = Vec::new(); + for chain in chains { + if is_test_chain(chain) { + test.push(chain.clone()); + } else { + prod.push(chain.clone()); + } + } + (prod, test) +} + +pub fn check_size(output: &str, force: bool) -> Result { + if output.len() > SIZE_LIMIT && !force { + Err(format!( + "Output too large ({} chars, limit {}). Use `force: true` to override, or narrow scope with max_depth/focus.", + output.len(), + SIZE_LIMIT + )) + } else { + Ok(output.to_string()) + } +} + +// --- Tree building internals --- + +enum TreeNode { + Dir { + name: String, + children: Vec, + }, + File { + name: String, + loc: usize, + funcs: usize, + classes: usize, + }, +} + +fn build_dir_tree(analyses: &[FileAnalysis], root: &Path) -> Vec { + let mut entries: Vec<(Vec, &FileAnalysis)> = analyses + .iter() + .filter_map(|a| { + let rel = a.path.strip_prefix(root).ok()?; + let parts: Vec = rel + .components() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .collect(); + Some((parts, a)) + }) + .collect(); + entries.sort_by(|a, b| a.0.cmp(&b.0)); + + build_subtree(&entries, 0) +} + +fn build_subtree(entries: &[(Vec, &FileAnalysis)], depth: usize) -> Vec { + let mut nodes: Vec = Vec::new(); + let mut i = 0; + + while i < entries.len() { + let (parts, analysis) = &entries[i]; + if depth >= parts.len() { + i += 1; + continue; + } + + let name = &parts[depth]; + + if depth + 1 == parts.len() { + nodes.push(TreeNode::File { + name: name.clone(), + loc: analysis.loc, + funcs: analysis.functions.len(), + classes: analysis.classes.len(), + }); + i += 1; + } else { + let mut j = i + 1; + while j < entries.len() && entries[j].0.len() > depth && entries[j].0[depth] == *name { + j += 1; + } + let children = build_subtree(&entries[i..j], depth + 1); + nodes.push(TreeNode::Dir { + name: name.clone(), + children, + }); + i = j; + } + } + + nodes.sort_by(|a, b| { + let a_is_dir = matches!(a, TreeNode::Dir { .. }); + let b_is_dir = matches!(b, TreeNode::Dir { .. }); + match (a_is_dir, b_is_dir) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => node_name(a).cmp(node_name(b)), + } + }); + + nodes +} + +fn node_name(node: &TreeNode) -> &str { + match node { + TreeNode::Dir { name, .. } | TreeNode::File { name, .. } => name, + } +} + +fn render_tree(out: &mut String, nodes: &[TreeNode], indent: usize) { + let prefix = " ".repeat(indent); + for node in nodes { + match node { + TreeNode::Dir { name, children } => { + let _ = writeln!(out, "{}{}/", prefix, name); + render_tree(out, children, indent + 1); + } + TreeNode::File { + name, + loc, + funcs, + classes, + } => { + let _ = write!(out, "{}{} [{}L, {}F", prefix, name, loc, funcs); + if *classes > 0 { + let _ = write!(out, ", {}C", classes); + } + out.push_str("]\n"); + } + } + } +} diff --git a/crates/goose/src/agents/platform_extensions/analyze/graph.rs b/crates/goose/src/agents/platform_extensions/analyze/graph.rs new file mode 100644 index 000000000000..e6c891a41f9c --- /dev/null +++ b/crates/goose/src/agents/platform_extensions/analyze/graph.rs @@ -0,0 +1,274 @@ +use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::PathBuf; + +use super::parser::{Call, FileAnalysis, Symbol}; + +/// (file_path, symbol_name, definition_line) — line disambiguates same-name +/// functions in the same file (e.g. two `process()` in different impl blocks). +type NodeKey = (PathBuf, String, usize); + +#[derive(Clone)] +pub struct ChainLink { + pub file: PathBuf, + pub name: String, + pub line: usize, +} + +struct Node { + file: PathBuf, + name: String, + line: usize, +} + +pub struct CallGraph { + nodes: HashMap, + // callee_key → set of caller_keys + incoming: HashMap>, + // caller_key → set of callee_keys + outgoing: HashMap>, +} + +impl CallGraph { + pub fn build(analyses: &[FileAnalysis]) -> Self { + let mut nodes: HashMap = HashMap::new(); + let mut incoming: HashMap> = HashMap::new(); + let mut outgoing: HashMap> = HashMap::new(); + + for a in analyses { + let register = |sym: &Symbol, nodes: &mut HashMap| { + let key = (a.path.clone(), sym.name.clone(), sym.line); + nodes.entry(key).or_insert_with(|| Node { + file: a.path.clone(), + name: sym.name.clone(), + line: sym.line, + }); + }; + for f in &a.functions { + register(f, &mut nodes); + } + for c in &a.classes { + register(c, &mut nodes); + } + } + + // Register pseudo-nodes so top-level calls have a caller key + for a in analyses { + let module_key = (a.path.clone(), "".to_string(), 0usize); + nodes.entry(module_key).or_insert_with(|| Node { + file: a.path.clone(), + name: "".to_string(), + line: 0, + }); + } + + // Build a name → keys index for resolving cross-file calls + let mut name_index: HashMap<&str, Vec> = HashMap::new(); + for key in nodes.keys() { + name_index.entry(&key.1).or_default().push(key.clone()); + } + + // Build (path, name) → sorted definition lines for caller resolution. + // When a Call says caller="process" at line 50, we pick the definition + // of "process" whose line is the largest value ≤ 50 (nearest enclosing). + let mut def_lines: HashMap<(&PathBuf, &str), Vec> = HashMap::new(); + for key in nodes.keys() { + def_lines.entry((&key.0, &key.1)).or_default().push(key.2); + } + for lines in def_lines.values_mut() { + lines.sort_unstable(); + } + + // Build path → language index to prevent cross-language false positives + let lang_index: HashMap<&PathBuf, &str> = + analyses.iter().map(|a| (&a.path, a.language)).collect(); + + for a in analyses { + for call in &a.calls { + // Fall back to pseudo-node for top-level calls + let caller_key = resolve_caller_key(a, call, &def_lines) + .unwrap_or_else(|| (a.path.clone(), "".to_string(), 0)); + // Resolve callee: same-file first, then cross-file (same language only) + let callee_keys = resolve_callee(a, call, &name_index, &lang_index); + for callee_key in callee_keys { + incoming + .entry(callee_key.clone()) + .or_default() + .insert(caller_key.clone()); + outgoing + .entry(caller_key.clone()) + .or_default() + .insert(callee_key); + } + } + } + + Self { + nodes, + incoming, + outgoing, + } + } + + pub fn definitions(&self, symbol: &str) -> Vec { + self.nodes + .values() + .filter(|n| n.name == symbol) + .map(|n| ChainLink { + file: n.file.clone(), + name: n.name.clone(), + line: n.line, + }) + .collect() + } + + pub fn incoming(&self, symbol: &str, depth: u32) -> Vec> { + let starts: Vec = self + .nodes + .keys() + .filter(|k| k.1 == symbol) + .cloned() + .collect(); + self.bfs_chains(&starts, depth, &self.incoming) + } + + pub fn outgoing(&self, symbol: &str, depth: u32) -> Vec> { + let starts: Vec = self + .nodes + .keys() + .filter(|k| k.1 == symbol) + .cloned() + .collect(); + self.bfs_chains(&starts, depth, &self.outgoing) + } + + fn bfs_chains( + &self, + starts: &[NodeKey], + depth: u32, + edges: &HashMap>, + ) -> Vec> { + if depth == 0 { + return vec![]; + } + + let mut chains = Vec::new(); + let mut queue: VecDeque<(Vec, u32)> = VecDeque::new(); + + for start in starts { + if let Some(neighbors) = edges.get(start) { + for neighbor in neighbors { + queue.push_back((vec![start.clone(), neighbor.clone()], 1)); + } + } + } + + while let Some((path, d)) = queue.pop_front() { + let Some(tip) = path.last() else { continue }; + + if d >= depth { + chains.push(self.to_chain_links(&path)); + continue; + } + + // Cycle detection: don't revisit nodes already in this path + let visited: HashSet<&NodeKey> = path.iter().collect(); + + match edges.get(tip) { + Some(neighbors) => { + let mut extended = false; + for neighbor in neighbors { + if !visited.contains(neighbor) { + let mut new_path = path.clone(); + new_path.push(neighbor.clone()); + queue.push_back((new_path, d + 1)); + extended = true; + } + } + if !extended { + chains.push(self.to_chain_links(&path)); + } + } + None => chains.push(self.to_chain_links(&path)), + } + } + + chains + } + + fn to_chain_links(&self, path: &[NodeKey]) -> Vec { + path.iter() + .map(|key| { + let node = self.nodes.get(key); + ChainLink { + file: key.0.clone(), + name: key.1.clone(), + line: node.map_or(0, |n| n.line), + } + }) + .collect() + } +} + +/// Given a call, find the NodeKey for the caller function. Uses the call's line +/// number to disambiguate when multiple functions share the same name in a file: +/// picks the definition whose line is the largest value ≤ call.line. +fn resolve_caller_key( + analysis: &FileAnalysis, + call: &Call, + def_lines: &HashMap<(&PathBuf, &str), Vec>, +) -> Option { + let caller_name = &call.caller; + if let Some(lines) = def_lines.get(&(&analysis.path, caller_name.as_str())) { + let line = match lines.binary_search(&call.line) { + Ok(idx) => lines[idx], + Err(0) => return None, // call is before any definition — shouldn't happen + Err(idx) => lines[idx - 1], + }; + Some((analysis.path.clone(), caller_name.clone(), line)) + } else { + None + } +} + +fn resolve_callee( + analysis: &FileAnalysis, + call: &Call, + name_index: &HashMap<&str, Vec>, + lang_index: &HashMap<&PathBuf, &str>, +) -> Vec { + let callee = &call.callee; + let caller_lang = analysis.language; + + // Strip scope prefix for qualified calls like Self::method(), Type::new(), + // HashMap::new(), module::func(). The name index is keyed on bare names + // (from Symbol.name), but call captures include the full scoped_identifier. + let bare_name = callee.rsplit("::").next().unwrap_or(callee); + + if let Some(keys) = name_index.get(bare_name) { + // Prefer same-file matches; when ambiguous pick nearest by line proximity + let same_file: Vec = keys + .iter() + .filter(|(path, _, _)| *path == analysis.path) + .cloned() + .collect(); + if !same_file.is_empty() { + if same_file.len() == 1 { + return same_file; + } + // Multiple same-file matches: pick nearest definition by line proximity + let nearest = same_file + .into_iter() + .min_by_key(|(_, _, line)| (call.line as i64 - *line as i64).unsigned_abs()) + .into_iter() + .collect(); + return nearest; + } + // Cross-file matches filtered to same language only + keys.iter() + .filter(|(path, _, _)| lang_index.get(path).copied() == Some(caller_lang)) + .cloned() + .collect() + } else { + vec![] + } +} diff --git a/crates/goose/src/agents/platform_extensions/analyze/languages.rs b/crates/goose/src/agents/platform_extensions/analyze/languages.rs new file mode 100644 index 000000000000..880d37e09736 --- /dev/null +++ b/crates/goose/src/agents/platform_extensions/analyze/languages.rs @@ -0,0 +1,326 @@ +use tree_sitter::Language; + +// ── Types ────────────────────────────────────────────────────────────── + +pub struct LangInfo { + pub name: &'static str, + pub extensions: &'static [&'static str], + pub language: fn() -> Language, + pub queries: LangQueries, + pub fn_kinds: &'static [&'static str], + pub fn_name_kinds: &'static [&'static str], + pub class_kinds: &'static [&'static str], +} + +pub struct LangQueries { + pub functions: &'static str, + pub classes: &'static str, + pub imports: &'static str, + pub calls: &'static str, +} + +// ── Language Registry ────────────────────────────────────────────────── + +static LANGUAGES: &[LangInfo] = &[ + LangInfo { + name: "rust", + extensions: &["rs"], + language: || tree_sitter_rust::LANGUAGE.into(), + fn_kinds: &["function_item"], + fn_name_kinds: &["identifier"], + class_kinds: &["impl_item", "struct_item", "trait_item", "enum_item"], + queries: LangQueries { + functions: r#" + (function_item name: (identifier) @name) + "#, + classes: r#" + (impl_item type: (type_identifier) @name) + (struct_item name: (type_identifier) @name) + (trait_item name: (type_identifier) @name) + (enum_item name: (type_identifier) @name) + "#, + imports: r#" + (use_declaration) @path + "#, + calls: r#" + (call_expression function: (identifier) @name) + (call_expression function: (field_expression field: (field_identifier) @name)) + (call_expression function: (scoped_identifier) @name) + (macro_invocation macro: (identifier) @name) + "#, + }, + }, + LangInfo { + name: "python", + extensions: &["py", "pyi"], + language: || tree_sitter_python::LANGUAGE.into(), + fn_kinds: &["function_definition"], + fn_name_kinds: &["identifier"], + class_kinds: &["class_definition"], + queries: LangQueries { + functions: r#" + (function_definition name: (identifier) @name) + "#, + classes: r#" + (class_definition name: (identifier) @name) + "#, + imports: r#" + (import_statement) @path + (import_from_statement) @path + "#, + calls: r#" + (call function: (identifier) @name) + (call function: (attribute attribute: (identifier) @name)) + (decorator (identifier) @name) + (decorator (attribute attribute: (identifier) @name)) + "#, + }, + }, + LangInfo { + name: "javascript", + extensions: &["js", "jsx", "mjs", "cjs"], + language: || tree_sitter_javascript::LANGUAGE.into(), + fn_kinds: &[ + "function_declaration", + "generator_function_declaration", + "method_definition", + "variable_declarator", + ], + fn_name_kinds: &["identifier", "property_identifier"], + class_kinds: &["class_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (identifier) @name) + (generator_function_declaration name: (identifier) @name) + (method_definition name: (property_identifier) @name) + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function))) + "#, + classes: r#" + (class_declaration name: (identifier) @name) + "#, + imports: r#" + (import_statement) @path + "#, + calls: r#" + (call_expression function: (identifier) @name) + (call_expression function: (member_expression property: (property_identifier) @name)) + (new_expression constructor: (identifier) @name) + "#, + }, + }, + LangInfo { + name: "typescript", + extensions: &["ts"], + language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + fn_kinds: &[ + "function_declaration", + "generator_function_declaration", + "method_definition", + "variable_declarator", + ], + fn_name_kinds: &["identifier", "property_identifier"], + class_kinds: &["class_declaration", "interface_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (identifier) @name) + (generator_function_declaration name: (identifier) @name) + (method_definition name: (property_identifier) @name) + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function))) + "#, + classes: r#" + (class_declaration name: (type_identifier) @name) + (interface_declaration name: (type_identifier) @name) + "#, + imports: r#" + (import_statement) @path + "#, + calls: r#" + (call_expression function: (identifier) @name) + (call_expression function: (member_expression property: (property_identifier) @name)) + (new_expression constructor: (identifier) @name) + "#, + }, + }, + LangInfo { + name: "tsx", + extensions: &["tsx"], + language: || tree_sitter_typescript::LANGUAGE_TSX.into(), + fn_kinds: &[ + "function_declaration", + "generator_function_declaration", + "method_definition", + "variable_declarator", + ], + fn_name_kinds: &["identifier", "property_identifier"], + class_kinds: &["class_declaration", "interface_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (identifier) @name) + (generator_function_declaration name: (identifier) @name) + (method_definition name: (property_identifier) @name) + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function))) + "#, + classes: r#" + (class_declaration name: (type_identifier) @name) + (interface_declaration name: (type_identifier) @name) + "#, + imports: r#" + (import_statement) @path + "#, + calls: r#" + (call_expression function: (identifier) @name) + (call_expression function: (member_expression property: (property_identifier) @name)) + (new_expression constructor: (identifier) @name) + "#, + }, + }, + LangInfo { + name: "go", + extensions: &["go"], + language: || tree_sitter_go::LANGUAGE.into(), + fn_kinds: &["function_declaration", "method_declaration"], + fn_name_kinds: &["identifier", "field_identifier"], + class_kinds: &["type_declaration", "method_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (identifier) @name) + (method_declaration name: (field_identifier) @name) + "#, + classes: r#" + (type_declaration (type_spec name: (type_identifier) @name)) + "#, + imports: r#" + (import_declaration) @path + "#, + calls: r#" + (call_expression function: (identifier) @name) + (call_expression function: (selector_expression field: (field_identifier) @name)) + "#, + }, + }, + LangInfo { + name: "java", + extensions: &["java"], + language: || tree_sitter_java::LANGUAGE.into(), + fn_kinds: &["method_declaration", "constructor_declaration"], + fn_name_kinds: &["identifier"], + class_kinds: &[ + "class_declaration", + "interface_declaration", + "enum_declaration", + ], + queries: LangQueries { + functions: r#" + (method_declaration name: (identifier) @name) + (constructor_declaration name: (identifier) @name) + "#, + classes: r#" + (class_declaration name: (identifier) @name) + (interface_declaration name: (identifier) @name) + (enum_declaration name: (identifier) @name) + "#, + imports: r#" + (import_declaration) @path + "#, + calls: r#" + (method_invocation name: (identifier) @name) + (object_creation_expression type: (type_identifier) @name) + "#, + }, + }, + LangInfo { + name: "kotlin", + extensions: &["kt", "kts"], + language: || tree_sitter_kotlin_ng::LANGUAGE.into(), + fn_kinds: &["function_declaration"], + fn_name_kinds: &["identifier"], + class_kinds: &["class_declaration", "object_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (identifier) @name) + "#, + classes: r#" + (class_declaration name: (identifier) @name) + (object_declaration name: (identifier) @name) + "#, + imports: r#" + (import) @path + "#, + calls: r#" + (call_expression (identifier) @name) + (call_expression (navigation_expression (identifier) @name)) + "#, + }, + }, + LangInfo { + name: "swift", + extensions: &["swift"], + language: || tree_sitter_swift::LANGUAGE.into(), + fn_kinds: &[ + "function_declaration", + "init_declaration", + "deinit_declaration", + ], + fn_name_kinds: &["simple_identifier"], + class_kinds: &["class_declaration", "protocol_declaration"], + queries: LangQueries { + functions: r#" + (function_declaration name: (simple_identifier) @name) + "#, + classes: r#" + (class_declaration name: (type_identifier) @name) + (class_declaration name: (user_type (type_identifier) @name)) + (protocol_declaration name: (type_identifier) @name) + (protocol_declaration name: (user_type (type_identifier) @name)) + "#, + imports: r#" + (import_declaration) @path + "#, + calls: r#" + (call_expression (simple_identifier) @name) + (call_expression (navigation_expression suffix: (navigation_suffix suffix: (simple_identifier) @name))) + (constructor_expression (user_type (type_identifier) @name)) + "#, + }, + }, + LangInfo { + name: "ruby", + extensions: &["rb", "rake", "gemspec"], + language: || tree_sitter_ruby::LANGUAGE.into(), + fn_kinds: &["method", "singleton_method"], + fn_name_kinds: &["identifier"], + class_kinds: &["class", "module"], + queries: LangQueries { + functions: r#" + (method name: (identifier) @name) + (singleton_method name: (identifier) @name) + "#, + classes: r#" + (class name: (constant) @name) + (module name: (constant) @name) + "#, + imports: r#" + (call + method: (identifier) @_method + (#match? @_method "^(require|require_relative|load)$")) @path + "#, + calls: r#" + (call method: (identifier) @name) + (call receiver: (constant) @name) + "#, + }, + }, +]; + +pub fn lang_for_ext(ext: &str) -> Option<&'static LangInfo> { + LANGUAGES.iter().find(|l| l.extensions.contains(&ext)) +} diff --git a/crates/goose/src/agents/platform_extensions/analyze/mod.rs b/crates/goose/src/agents/platform_extensions/analyze/mod.rs new file mode 100644 index 000000000000..fe7edc23ae9a --- /dev/null +++ b/crates/goose/src/agents/platform_extensions/analyze/mod.rs @@ -0,0 +1,465 @@ +pub mod format; +pub mod graph; +pub mod languages; +pub mod parser; + +use crate::agents::extension::PlatformExtensionContext; +use crate::agents::mcp_client::{Error, McpClientTrait}; +use anyhow::Result; +use async_trait::async_trait; +use ignore::WalkBuilder; +use indoc::indoc; +use parser::{FileAnalysis, Parser}; +use rayon::prelude::*; +use rmcp::model::{ + CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult, + ProtocolVersion, ServerCapabilities, Tool, ToolAnnotations, ToolsCapability, +}; +use schemars::{schema_for, JsonSchema}; +use serde::Deserialize; +use serde_json::Value; +use std::path::{Path, PathBuf}; +use tokio_util::sync::CancellationToken; + +pub static EXTENSION_NAME: &str = "analyze"; + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct AnalyzeParams { + /// File or directory path to analyze + pub path: String, + /// Symbol name to focus on (triggers call graph mode) + #[serde(default)] + pub focus: Option, + /// Directory recursion depth limit (default 3, 0=unlimited). Also limits focus scan depth. + #[serde(default = "default_max_depth")] + pub max_depth: u32, + /// Call graph traversal depth (default 2, 0=definitions only) + #[serde(default = "default_follow_depth")] + pub follow_depth: u32, + /// Allow large outputs without size warning + #[serde(default)] + pub force: bool, +} + +fn default_max_depth() -> u32 { + 3 +} +fn default_follow_depth() -> u32 { + 2 +} + +pub struct AnalyzeClient { + info: InitializeResult, +} + +impl AnalyzeClient { + pub fn new(_context: PlatformExtensionContext) -> Result { + let info = InitializeResult { + protocol_version: ProtocolVersion::V_2025_03_26, + capabilities: ServerCapabilities { + tools: Some(ToolsCapability { + list_changed: Some(false), + }), + tasks: None, + resources: None, + extensions: None, + prompts: None, + completions: None, + experimental: None, + logging: None, + }, + server_info: Implementation { + name: EXTENSION_NAME.to_string(), + description: None, + title: Some("Analyze".to_string()), + version: "1.0.0".to_string(), + icons: None, + website_url: None, + }, + instructions: Some( + indoc! {" + Analyze code structure using tree-sitter AST parsing. Three auto-selected modes: + - Directory path → structure overview (file tree with function/class counts) + - File path → semantic details (functions, classes, imports, call counts) + - Any path + focus parameter → symbol call graph (incoming/outgoing chains) + + For large codebases, delegate analysis to a subagent and retain only the summary. + "} + .to_string(), + ), + }; + + Ok(Self { info }) + } + + fn schema() -> JsonObject { + serde_json::to_value(schema_for!(T)) + .expect("schema serialization should succeed") + .as_object() + .expect("schema should serialize to an object") + .clone() + } + + fn parse_args( + arguments: Option, + ) -> Result { + let value = arguments + .map(Value::Object) + .ok_or_else(|| "Missing arguments".to_string())?; + serde_json::from_value(value).map_err(|e| format!("Failed to parse arguments: {e}")) + } + + fn resolve_path(path: &str, working_dir: Option<&Path>) -> PathBuf { + let p = PathBuf::from(path); + if p.is_absolute() { + p + } else if let Some(cwd) = working_dir { + cwd.join(p) + } else { + p + } + } + + fn analyze(&self, params: AnalyzeParams, path: PathBuf) -> CallToolResult { + if !path.exists() { + return CallToolResult::error(vec![Content::text(format!( + "Error: path not found: {}", + path.display() + )) + .with_priority(0.0)]); + } + + if let Some(ref focus) = params.focus { + self.focused_mode( + &path, + focus, + params.follow_depth, + params.max_depth, + params.force, + ) + } else if path.is_file() { + self.semantic_mode(&path, params.force) + } else { + self.structure_mode(&path, params.max_depth, params.force) + } + } + + pub fn analyze_file(path: &Path) -> Option { + let source = std::fs::read_to_string(path).ok()?; + let parser = Parser::new(); + parser.analyze_file(path, &source) + } + + pub fn collect_files(dir: &Path, max_depth: u32) -> Vec { + let mut builder = WalkBuilder::new(dir); + if max_depth > 0 { + builder.max_depth(Some(max_depth as usize)); + } + builder + .build() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_some_and(|ft| ft.is_file())) + .map(|e| e.into_path()) + .collect() + } + + fn structure_mode(&self, dir: &Path, max_depth: u32, force: bool) -> CallToolResult { + let files = Self::collect_files(dir, max_depth); + let total_files = files.len(); + + let analyses: Vec = files + .par_iter() + .filter_map(|f| Self::analyze_file(f)) + .collect(); + + let output = format::format_structure(&analyses, dir, max_depth, total_files); + Self::finish(output, force) + } + + fn semantic_mode(&self, path: &Path, force: bool) -> CallToolResult { + match Self::analyze_file(path) { + Some(analysis) => { + let root = path.parent().unwrap_or(path); + let output = format::format_semantic(&analysis, root); + Self::finish(output, force) + } + None => CallToolResult::error(vec![Content::text(format!( + "Error: could not analyze {} (unsupported language or binary file)", + path.display() + )) + .with_priority(0.0)]), + } + } + + fn focused_mode( + &self, + path: &Path, + symbol: &str, + follow_depth: u32, + max_depth: u32, + force: bool, + ) -> CallToolResult { + let files = if path.is_file() { + vec![path.to_path_buf()] + } else { + Self::collect_files(path, max_depth) + }; + + let analyses: Vec = files + .par_iter() + .filter_map(|f| Self::analyze_file(f)) + .collect(); + + let root = if path.is_file() { + path.parent().unwrap_or(path) + } else { + path + }; + let g = graph::CallGraph::build(&analyses); + let output = format::format_focused(symbol, &g, follow_depth, analyses.len(), root); + Self::finish(output, force) + } + + fn finish(output: String, force: bool) -> CallToolResult { + match format::check_size(&output, force) { + Ok(text) => CallToolResult::success(vec![Content::text(text).with_priority(0.0)]), + Err(warning) => CallToolResult::error(vec![Content::text(warning).with_priority(0.0)]), + } + } +} + +#[async_trait] +impl McpClientTrait for AnalyzeClient { + async fn list_tools( + &self, + _session_id: &str, + _next_cursor: Option, + _cancellation_token: CancellationToken, + ) -> Result { + let tool = Tool::new( + "analyze".to_string(), + "Analyze code structure in 3 modes: 1) Directory overview - file tree with LOC/function/class counts to max_depth. 2) File details - functions, classes, imports. 3) Symbol focus - call graphs across directory to max_depth (requires file or directory path, case-sensitive). Typical flow: directory → files → symbols. Functions called >3x show •N.".to_string(), + Self::schema::(), + ) + .annotate(ToolAnnotations { + title: Some("Analyze".to_string()), + read_only_hint: Some(true), + destructive_hint: Some(false), + idempotent_hint: Some(true), + open_world_hint: Some(false), + }); + + Ok(ListToolsResult { + tools: vec![tool], + next_cursor: None, + meta: None, + }) + } + + async fn call_tool( + &self, + _session_id: &str, + name: &str, + arguments: Option, + working_dir: Option<&str>, + _cancellation_token: CancellationToken, + ) -> Result { + let working_dir = working_dir.map(Path::new); + match name { + "analyze" => match Self::parse_args::(arguments) { + Ok(params) => { + let path = Self::resolve_path(¶ms.path, working_dir); + Ok(self.analyze(params, path)) + } + Err(error) => Ok(CallToolResult::error(vec![Content::text(format!( + "Error: {error}" + )) + .with_priority(0.0)])), + }, + _ => Ok(CallToolResult::error(vec![Content::text(format!( + "Error: Unknown tool: {name}" + )) + .with_priority(0.0)])), + } + } + + fn get_info(&self) -> Option<&InitializeResult> { + Some(&self.info) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::session::SessionManager; + use rmcp::model::RawContent; + use std::fs; + use std::sync::Arc; + use tempfile::tempdir; + + fn ctx() -> PlatformExtensionContext { + PlatformExtensionContext { + extension_manager: None, + session_manager: Arc::new(SessionManager::new(std::env::temp_dir())), + session: None, + } + } + + fn text(result: &CallToolResult) -> &str { + match &result.content[0].raw { + RawContent::Text(t) => &t.text, + _ => panic!("expected text"), + } + } + + #[tokio::test] + async fn structure_mode() { + let tmp = tempdir().unwrap(); + fs::write( + tmp.path().join("lib.rs"), + "use std::io;\nfn read() {}\nfn write() {}\nstruct Buffer;\n", + ) + .unwrap(); + fs::write( + tmp.path().join("app.py"), + "import os\nclass App:\n pass\ndef main():\n pass\ndef run():\n pass\n", + ) + .unwrap(); + + let client = AnalyzeClient::new(ctx()).unwrap(); + let result = client.analyze( + AnalyzeParams { + path: tmp.path().to_str().unwrap().into(), + focus: None, + max_depth: 3, + follow_depth: 2, + force: false, + }, + tmp.path().to_path_buf(), + ); + let out = text(&result); + + assert!(out.contains("2 files")); + assert!(out.contains("F")); + assert!(out.contains("lib.rs")); + assert!(out.contains("app.py")); + assert!(out.contains("rust")); + assert!(out.contains("python")); + } + + #[tokio::test] + async fn semantic_mode() { + let tmp = tempdir().unwrap(); + let file = tmp.path().join("demo.rs"); + fs::write( + &file, + r#" +use std::collections::HashMap; +use std::io; + +struct Config; + +fn validate(x: i32) -> bool { x > 0 } +fn process() { + validate(1); + validate(2); + validate(3); + validate(4); + helper(); +} +fn helper() { validate(0); } +"#, + ) + .unwrap(); + + let client = AnalyzeClient::new(ctx()).unwrap(); + let result = client.analyze( + AnalyzeParams { + path: file.to_str().unwrap().into(), + focus: None, + max_depth: 3, + follow_depth: 2, + force: false, + }, + file.clone(), + ); + let out = text(&result); + + // Functions listed with signatures and line numbers + assert!(out.contains("F:")); + assert!(out.contains("validate(")); + assert!(out.contains("process:")); + assert!(out.contains("helper")); + // Struct + assert!(out.contains("C:")); + assert!(out.contains("Config:")); + // Imports + assert!(out.contains("I:")); + assert!(out.contains("std::collections::HashMap")); + // validate called 5 times (>3) → •5 + assert!(out.contains("validate(") && out.contains("•5")); + } + + #[tokio::test] + async fn focused_mode() { + let tmp = tempdir().unwrap(); + fs::write(tmp.path().join("a.rs"), "fn process() { validate(1); }\n").unwrap(); + fs::write(tmp.path().join("b.rs"), "fn validate() { process(); }\n").unwrap(); + + let client = AnalyzeClient::new(ctx()).unwrap(); + let result = client.analyze( + AnalyzeParams { + path: tmp.path().to_str().unwrap().into(), + focus: Some("process".into()), + max_depth: 3, + follow_depth: 2, + force: false, + }, + tmp.path().to_path_buf(), + ); + let out = text(&result); + + assert!(out.contains("FOCUS: process")); + assert!(out.contains("DEF")); + assert!(out.contains("IN") || out.contains("OUT")); + assert!(out.contains("files analyzed")); + } + + #[tokio::test] + async fn error_and_edge() { + let client = AnalyzeClient::new(ctx()).unwrap(); + + // Nonexistent path + let result = client.analyze( + AnalyzeParams { + path: "/no/such/path".into(), + focus: None, + max_depth: 3, + follow_depth: 2, + force: false, + }, + PathBuf::from("/no/such/path"), + ); + assert_eq!(result.is_error, Some(true)); + assert!(text(&result).contains("path not found")); + + // Empty directory → 0 files + let tmp = tempdir().unwrap(); + let result = client.analyze( + AnalyzeParams { + path: tmp.path().to_str().unwrap().into(), + focus: None, + max_depth: 3, + follow_depth: 2, + force: false, + }, + tmp.path().to_path_buf(), + ); + assert!(text(&result).contains("0 files")); + + // Size guard + let big = "x".repeat(60_000); + assert!(format::check_size(&big, false).is_err()); + assert!(format::check_size(&big, true).is_ok()); + } +} diff --git a/crates/goose/src/agents/platform_extensions/analyze/parser.rs b/crates/goose/src/agents/platform_extensions/analyze/parser.rs new file mode 100644 index 000000000000..d26d0030cc95 --- /dev/null +++ b/crates/goose/src/agents/platform_extensions/analyze/parser.rs @@ -0,0 +1,758 @@ +use std::path::{Path, PathBuf}; +use tree_sitter::{Language, Parser as TsParser, Query, QueryCursor, StreamingIterator}; + +use super::languages::{lang_for_ext, LangInfo}; + +// ── Types ────────────────────────────────────────────────────────────── + +pub struct FileAnalysis { + pub path: PathBuf, + pub language: &'static str, + pub loc: usize, + pub functions: Vec, + pub classes: Vec, + pub imports: Vec, + pub calls: Vec, +} + +pub struct Symbol { + pub name: String, + pub line: usize, + pub parent: Option, + pub detail: Option, +} + +pub struct Import { + pub module: String, + pub count: usize, +} + +pub struct Call { + pub caller: String, + pub callee: String, + pub line: usize, +} + +// ── Parser ───────────────────────────────────────────────────────────── + +#[derive(Default)] +pub struct Parser; + +impl Parser { + pub fn new() -> Self { + Self + } + + pub fn analyze_file(&self, path: &Path, source: &str) -> Option { + let ext = path.extension()?.to_str()?; + let info = lang_for_ext(ext)?; + let lang = (info.language)(); + + let mut parser = TsParser::new(); + parser.set_language(&lang).ok()?; + let tree = parser.parse(source, None)?; + let root = tree.root_node(); + + let loc = source.lines().count(); + let functions = extract_functions(&lang, info, root, source); + let classes = extract_classes(&lang, info, root, source); + let imports = extract_imports(&lang, info.queries.imports, root, source); + let calls = extract_calls(&lang, info.queries.calls, root, source, info); + + Some(FileAnalysis { + path: path.to_path_buf(), + language: info.name, + loc, + functions, + classes, + imports, + calls, + }) + } +} + +// ── Query Runners ────────────────────────────────────────────────────── + +fn extract_functions( + lang: &Language, + info: &LangInfo, + root: tree_sitter::Node, + source: &str, +) -> Vec { + let Ok(query) = Query::new(lang, info.queries.functions) else { + return vec![]; + }; + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, root, source.as_bytes()); + let mut symbols = Vec::new(); + + while let Some(m) = matches.next() { + for cap in m.captures { + if query.capture_names()[cap.index as usize] == "name" { + let name = node_text(source, &cap.node).to_string(); + let line = cap.node.start_position().row + 1; + let parent = find_enclosing_class(cap.node, source, info); + let detail = extract_fn_signature(cap.node, source); + symbols.push(Symbol { + name, + line, + parent, + detail, + }); + } + } + } + + // Swift init/deinit declarations don't have a name child, so the query + // can't capture them. Walk the tree to find them and add as symbols. + if info.name == "swift" { + collect_init_deinit(root, source, info, &mut symbols); + } + + symbols +} + +/// Recursively collect Swift init_declaration and deinit_declaration nodes. +fn collect_init_deinit( + node: tree_sitter::Node, + source: &str, + info: &LangInfo, + symbols: &mut Vec, +) { + for i in 0..node.child_count() as u32 { + if let Some(child) = node.child(i) { + match child.kind() { + "init_declaration" => { + symbols.push(Symbol { + name: "init".to_string(), + line: child.start_position().row + 1, + parent: find_enclosing_class(child, source, info), + detail: extract_fn_signature_from_node(child, source), + }); + } + "deinit_declaration" => { + symbols.push(Symbol { + name: "deinit".to_string(), + line: child.start_position().row + 1, + parent: find_enclosing_class(child, source, info), + detail: extract_fn_signature_from_node(child, source), + }); + } + _ => {} + } + // Recurse into class/protocol bodies to find nested init/deinit + collect_init_deinit(child, source, info, symbols); + } + } +} + +fn extract_classes( + lang: &Language, + info: &LangInfo, + root: tree_sitter::Node, + source: &str, +) -> Vec { + let Ok(query) = Query::new(lang, info.queries.classes) else { + return vec![]; + }; + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, root, source.as_bytes()); + let mut symbols = Vec::new(); + + while let Some(m) = matches.next() { + for cap in m.captures { + if query.capture_names()[cap.index as usize] == "name" { + let name_text = node_text(source, &cap.node).to_string(); + let line = cap.node.start_position().row + 1; + + let inheritance = cap + .node + .parent() + .map(|p| extract_inheritance(info.name, &p, source)) + .filter(|s| !s.is_empty()); + + let fields = extract_class_detail(cap.node, source, info); + let detail = match (&inheritance, &fields) { + (Some(inh), Some(f)) => Some(format!("({}) {}", inh, f)), + (Some(inh), None) => Some(format!("({})", inh)), + (None, Some(f)) => Some(f.clone()), + (None, None) => None, + }; + + symbols.push(Symbol { + name: name_text, + line, + parent: None, + detail, + }); + } + } + } + symbols +} + +/// Extract the superclass / extends / implements target from a class declaration node. +/// Returns an empty string if no inheritance is detected. +/// The `class_node` is the parent of the name node (i.e. the full class declaration). +fn extract_inheritance(lang_name: &str, class_node: &tree_sitter::Node, source: &str) -> String { + match lang_name { + // Python: class Foo(Bar, Baz) → argument_list sibling + "python" => { + if let Some(supers) = find_child_by_kind(class_node, "argument_list") { + let super_text = node_text(source, &supers).trim(); + let inner = super_text + .strip_prefix('(') + .and_then(|s| s.strip_suffix(')')) + .unwrap_or(""); + if !inner.is_empty() { + return inner.to_string(); + } + } + String::new() + } + + // TypeScript / TSX: class Foo extends Bar implements Baz { ... } + // class_declaration → class_heritage → extends_clause → type_identifier + // interface_declaration → extends_type_clause → type_identifier + "typescript" | "tsx" => { + if let Some(heritage) = find_child_by_kind(class_node, "class_heritage") { + if let Some(extends_clause) = find_child_by_kind(&heritage, "extends_clause") { + if let Some(ti) = find_descendant_by_kind(&extends_clause, "type_identifier") + .or_else(|| find_descendant_by_kind(&extends_clause, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + } + if let Some(extends_clause) = find_child_by_kind(class_node, "extends_type_clause") { + if let Some(ti) = find_descendant_by_kind(&extends_clause, "type_identifier") + .or_else(|| find_descendant_by_kind(&extends_clause, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + String::new() + } + + // JavaScript: class Foo extends Bar { ... } + // class_declaration → class_heritage → identifier | member_expression | call_expression + "javascript" => { + if let Some(heritage) = find_child_by_kind(class_node, "class_heritage") { + // Get the full extends expression (could be identifier, member_expression, etc.) + for i in 0..heritage.child_count() as u32 { + if let Some(child) = heritage.child(i) { + let text = node_text(source, &child).trim(); + if !text.is_empty() && text != "extends" { + return text.to_string(); + } + } + } + } + String::new() + } + + // Java: class Foo extends Bar implements Baz { ... } + // class_declaration → superclass → type_identifier + "java" => { + if let Some(superclass) = find_child_by_kind(class_node, "superclass") { + if let Some(ti) = find_descendant_by_kind(&superclass, "type_identifier") + .or_else(|| find_descendant_by_kind(&superclass, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + if let Some(extends) = find_child_by_kind(class_node, "extends_interfaces") { + if let Some(ti) = find_descendant_by_kind(&extends, "type_identifier") + .or_else(|| find_descendant_by_kind(&extends, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + String::new() + } + + // Kotlin: class Foo : Bar(), Baz { ... } + // class_declaration → delegation_specifiers → delegation_specifier → user_type → type_identifier + "kotlin" => { + if let Some(specs) = find_child_by_kind(class_node, "delegation_specifiers") { + if let Some(spec) = find_child_by_kind(&specs, "delegation_specifier") { + // Try user_type → type_identifier first + if let Some(ut) = find_child_by_kind(&spec, "user_type") { + if let Some(ti) = find_descendant_by_kind(&ut, "type_identifier") + .or_else(|| find_descendant_by_kind(&ut, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + // Fallback: constructor_invocation → user_type + if let Some(ci) = find_child_by_kind(&spec, "constructor_invocation") { + if let Some(ut) = find_child_by_kind(&ci, "user_type") { + if let Some(ti) = find_descendant_by_kind(&ut, "type_identifier") + .or_else(|| find_descendant_by_kind(&ut, "identifier")) + { + return node_text(source, &ti).to_string(); + } + } + } + } + } + String::new() + } + + // Ruby: class Foo < Bar + // class → superclass → constant | scope_resolution + "ruby" => { + if let Some(superclass) = find_child_by_kind(class_node, "superclass") { + if let Some(c) = find_child_by_kind(&superclass, "scope_resolution") { + return node_text(source, &c).to_string(); + } + if let Some(c) = find_child_by_kind(&superclass, "constant") { + return node_text(source, &c).to_string(); + } + } + String::new() + } + + // Swift: class Foo: Bar, Protocol { ... } + // class_declaration → inheritance_specifier → type_identifier + "swift" => { + if let Some(inh) = find_child_by_kind(class_node, "inheritance_specifier") { + if let Some(ti) = find_descendant_by_kind(&inh, "user_type") { + if let Some(id) = find_descendant_by_kind(&ti, "type_identifier") { + return node_text(source, &id).to_string(); + } + } + if let Some(ti) = find_descendant_by_kind(&inh, "type_identifier") { + return node_text(source, &ti).to_string(); + } + } + String::new() + } + + // Rust: impl Display for MyType → "MyType(impl Display)" + // impl_item with "for" keyword: trait is the first type_identifier, type is after "for" + "rust" => { + if class_node.kind() != "impl_item" { + return String::new(); + } + let mut has_for = false; + for i in 0..class_node.child_count() as u32 { + if let Some(child) = class_node.child(i) { + if node_text(source, &child) == "for" { + has_for = true; + break; + } + } + } + if !has_for { + // Inherent impl (no trait) — return "impl" to distinguish from struct definition + return "impl".to_string(); + } + let mut trait_name = String::new(); + let mut found_for = false; + for i in 0..class_node.child_count() as u32 { + if let Some(child) = class_node.child(i) { + if node_text(source, &child) == "for" { + found_for = true; + } else if !found_for + && (child.kind() == "type_identifier" + || child.kind() == "scoped_type_identifier" + || child.kind() == "generic_type") + { + trait_name = node_text(source, &child).to_string(); + } + } + } + if !trait_name.is_empty() { + return format!("impl {}", trait_name); + } + String::new() + } + + _ => String::new(), + } +} + +/// Walk up from a function node to find the nearest enclosing class-like container. +fn find_enclosing_class(node: tree_sitter::Node, source: &str, info: &LangInfo) -> Option { + let mut cur = node; + while let Some(parent) = cur.parent() { + if info.class_kinds.contains(&parent.kind()) { + if parent.kind() == "impl_item" { + // For trait impls (impl Trait for Type), get the type after "for" + let mut found_for = false; + for i in 0..parent.child_count() as u32 { + if let Some(child) = parent.child(i) { + if node_text(source, &child) == "for" { + found_for = true; + } else if found_for + && (child.kind() == "type_identifier" + || child.kind() == "generic_type" + || child.kind() == "scoped_type_identifier") + { + return Some(node_text(source, &child).to_string()); + } + } + } + // Inherent impl — first type_identifier is the type itself + return find_child_by_kind(&parent, "type_identifier") + .map(|n| node_text(source, &n).to_string()); + } + // Go method_declaration: func (r *ReceiverType) Method() — extract receiver type + if parent.kind() == "method_declaration" { + if let Some(params) = find_child_by_kind(&parent, "parameter_list") { + if let Some(ti) = find_descendant_by_kind(¶ms, "type_identifier") { + return Some(node_text(source, &ti).to_string()); + } + } + return None; + } + // For Go type_declaration, look inside type_spec + if parent.kind() == "type_declaration" { + for i in 0..parent.child_count() as u32 { + if let Some(child) = parent.child(i) { + if child.kind() == "type_spec" { + return find_child_by_kind(&child, "type_identifier") + .map(|n| node_text(source, &n).to_string()); + } + } + } + return None; + } + let name_kinds = &[ + "identifier", + "type_identifier", + "constant", + "simple_identifier", + ]; + for kind in name_kinds { + if let Some(n) = find_child_by_kind(&parent, kind) { + return Some(node_text(source, &n).to_string()); + } + } + return None; + } + cur = parent; + } + None +} + +/// Extract a compact function signature: "(params) -> ReturnType" +fn extract_fn_signature(name_node: tree_sitter::Node, source: &str) -> Option { + let fn_node = name_node.parent()?; + extract_fn_signature_from_node(fn_node, source) +} + +/// Extract a compact function signature directly from the function node. +/// Used for nodes like Swift init/deinit that don't have a name child. +fn extract_fn_signature_from_node(fn_node: tree_sitter::Node, source: &str) -> Option { + let mut parts = String::new(); + + let param_kinds = &[ + "parameters", + "formal_parameters", + "parameter_list", + "function_value_parameters", + "method_parameters", + "lambda_parameters", + ]; + let params_node = param_kinds + .iter() + .find_map(|kind| find_child_by_kind(&fn_node, kind)); + + if let Some(pn) = params_node { + let raw = node_text(source, &pn); + if raw.len() <= 60 { + parts.push_str(raw); + } else { + let count = raw.matches(',').count() + 1; + parts.push_str(&format!("({} args)", count)); + } + } else { + parts.push_str("()"); + } + + let ret_kinds = &["type", "return_type", "type_annotation"]; + // For Rust: look for a child that is "->" followed by a type + // For Python: look for "return_type" or "type" child + // Generic approach: scan children for return type indicators + for i in 0..fn_node.child_count() as u32 { + if let Some(child) = fn_node.child(i) { + if ret_kinds.contains(&child.kind()) { + let ret_text = node_text(source, &child).trim().to_string(); + if !ret_text.is_empty() { + let ret_text = ret_text + .trim_start_matches("->") + .trim_start_matches(':') + .trim(); + if !ret_text.is_empty() { + parts.push_str("->"); + parts.push_str(&truncate(ret_text, 30)); + } + } + break; + } + // Rust uses "->" as a literal anonymous child, then a type child follows + if node_text(source, &child) == "->" { + if let Some(type_child) = fn_node.child(i + 1) { + let ret_text = node_text(source, &type_child).trim(); + if !ret_text.is_empty() { + parts.push_str("->"); + parts.push_str(&truncate(ret_text, 30)); + } + } + break; + } + } + } + + if parts == "()" { + return None; + } + Some(parts) +} + +/// Extract a compact detail for a class/struct: field names or count. +fn extract_class_detail( + name_node: tree_sitter::Node, + source: &str, + info: &LangInfo, +) -> Option { + let class_node = name_node.parent()?; + + let (body_kinds, field_kinds): (&[&str], &[&str]) = match info.name { + "rust" => (&["field_declaration_list"], &["field_declaration"]), + "go" => ( + &["field_declaration_list", "struct_type"], + &["field_declaration"], + ), + "java" | "kotlin" => (&["class_body"], &["field_declaration"]), + _ => return None, // Skip Python (hard), JS/TS/Ruby/Swift for now + }; + + let body = body_kinds + .iter() + .find_map(|kind| find_descendant_by_kind(&class_node, kind))?; + + let mut fields: Vec = Vec::new(); + collect_field_names(&body, field_kinds, source, &mut fields); + + if fields.is_empty() { + return None; + } + + if fields.len() <= 5 { + Some(format!("{{{}}}", fields.join(","))) + } else { + Some(format!("{{{}f}}", fields.len())) + } +} + +fn collect_field_names( + node: &tree_sitter::Node, + field_kinds: &[&str], + source: &str, + out: &mut Vec, +) { + for i in 0..node.child_count() as u32 { + if let Some(child) = node.child(i) { + if field_kinds.contains(&child.kind()) { + // Java/Kotlin: field name is inside variable_declarator, not a direct child. + // e.g. (field_declaration type: (type_identifier) declarator: (variable_declarator name: (identifier))) + if let Some(vd) = find_child_by_kind(&child, "variable_declarator") { + if let Some(n) = find_child_by_kind(&vd, "identifier") { + out.push(node_text(source, &n).to_string()); + continue; + } + } + // Default: direct child lookup (Rust, Go, etc.) + let name_kinds = &["field_identifier", "identifier", "type_identifier"]; + for nk in name_kinds { + if let Some(n) = find_child_by_kind(&child, nk) { + out.push(node_text(source, &n).to_string()); + break; + } + } + } + } + } +} + +fn find_child_by_kind<'a>( + node: &tree_sitter::Node<'a>, + kind: &str, +) -> Option> { + (0..node.child_count() as u32) + .filter_map(|i| node.child(i)) + .find(|c| c.kind() == kind) +} + +fn find_descendant_by_kind<'a>( + node: &tree_sitter::Node<'a>, + kind: &str, +) -> Option> { + if node.kind() == kind { + return Some(*node); + } + for i in 0..node.child_count() as u32 { + if let Some(child) = node.child(i) { + if let Some(found) = find_descendant_by_kind(&child, kind) { + return Some(found); + } + } + } + None +} + +fn extract_imports( + lang: &Language, + query_src: &str, + root: tree_sitter::Node, + source: &str, +) -> Vec { + let Ok(query) = Query::new(lang, query_src) else { + return vec![]; + }; + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, root, source.as_bytes()); + let mut imports: Vec = Vec::new(); + + while let Some(m) = matches.next() { + for cap in m.captures { + if query.capture_names()[cap.index as usize] != "path" { + continue; + } + let raw = node_text(source, &cap.node); + let module = normalize_import(raw.trim()); + if let Some(existing) = imports.iter_mut().find(|i| i.module == module) { + existing.count += 1; + } else { + imports.push(Import { module, count: 1 }); + } + } + } + imports +} + +fn normalize_import(s: &str) -> String { + let s = s + .trim_start_matches("use ") + .trim_start_matches("import ") + .trim_start_matches("from ") + .trim_start_matches("require_relative ") + .trim_start_matches("require ") + .trim_start_matches("load ") + .trim_end_matches(';') + .trim() + .trim_matches(|c| c == '\'' || c == '"'); + // Handle Python "from X import Y" → keep just "X" + let s = s.split(" import ").next().unwrap_or(s); + // JS/TS: "React from 'react'" or "{ useState } from 'react'" → "react" + // After stripping "import " prefix, we have "React from 'react'" + // Extract the module path after " from " and strip quotes + if let Some(idx) = s.find(" from ") { + let module = s + .get(idx + 6..) + .unwrap_or("") + .trim() + .trim_matches(|c: char| c == '\'' || c == '"'); + if !module.is_empty() { + return module.to_string(); + } + } + // Rust: strip brace groups like "std::collections::{HashMap, HashSet}" + match s.find("::{") { + Some(i) => s.get(..i).unwrap_or(s).to_string(), + None => s.to_string(), + } +} + +fn extract_calls( + lang: &Language, + query_src: &str, + root: tree_sitter::Node, + source: &str, + info: &LangInfo, +) -> Vec { + let Ok(query) = Query::new(lang, query_src) else { + return vec![]; + }; + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, root, source.as_bytes()); + let mut calls = Vec::new(); + + while let Some(m) = matches.next() { + for cap in m.captures { + if query.capture_names()[cap.index as usize] != "name" { + continue; + } + let callee = node_text(source, &cap.node).to_string(); + let line = cap.node.start_position().row + 1; + let caller = + find_enclosing_fn(cap.node, source, info).unwrap_or_else(|| "".to_string()); + calls.push(Call { + caller, + callee, + line, + }); + } + } + calls +} + +fn find_enclosing_fn(node: tree_sitter::Node, source: &str, info: &LangInfo) -> Option { + let mut cur = node; + while let Some(parent) = cur.parent() { + if info.fn_kinds.contains(&parent.kind()) { + // Special case: Swift init/deinit + if parent.kind() == "init_declaration" { + return Some("init".into()); + } + if parent.kind() == "deinit_declaration" { + return Some("deinit".into()); + } + // variable_declarator: only treat as function scope if value is arrow/function + if parent.kind() == "variable_declarator" { + let is_fn_value = find_child_by_kind(&parent, "arrow_function").is_some() + || find_child_by_kind(&parent, "function").is_some(); + if !is_fn_value { + cur = parent; + continue; // skip — not a function declarator + } + } + // If this is an anonymous function-like node (closure, async block, arrow + // function with no name), keep walking up to find the enclosing named function. + if let Some(name) = find_child_text(&parent, info.fn_name_kinds, source) { + return Some(name); + } + } + cur = parent; + } + None +} + +fn find_child_text(node: &tree_sitter::Node, kinds: &[&str], source: &str) -> Option { + (0..node.child_count() as u32) + .filter_map(|i| node.child(i)) + .find(|c| kinds.contains(&c.kind())) + .map(|c| node_text(source, &c).to_string()) +} + +/// Truncate a string to at most `max` chars, appending "..." if truncated. +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + return s.to_string(); + } + let limit = max.saturating_sub(3); + // Walk back to a valid char boundary + let end = (0..=limit) + .rev() + .find(|&i| s.is_char_boundary(i)) + .unwrap_or(0); + let prefix = s.get(..end).unwrap_or(""); + format!("{}...", prefix) +} + +fn node_text<'a>(source: &'a str, node: &tree_sitter::Node) -> &'a str { + source.get(node.byte_range()).unwrap_or("") +} diff --git a/crates/goose/src/agents/platform_extensions/mod.rs b/crates/goose/src/agents/platform_extensions/mod.rs index 384a8e24a170..69605746e967 100644 --- a/crates/goose/src/agents/platform_extensions/mod.rs +++ b/crates/goose/src/agents/platform_extensions/mod.rs @@ -1,3 +1,4 @@ +pub mod analyze; pub mod apps; pub mod chatrecall; #[cfg(feature = "code-mode")] @@ -26,6 +27,19 @@ pub static PLATFORM_EXTENSIONS: Lazy || { let mut map = HashMap::new(); + map.insert( + analyze::EXTENSION_NAME, + PlatformExtensionDef { + name: analyze::EXTENSION_NAME, + display_name: "Analyze", + description: + "Analyze code structure with tree-sitter: directory overviews, file details, symbol call graphs", + default_enabled: true, + unprefixed_tools: true, + client_factory: |ctx| Box::new(analyze::AnalyzeClient::new(ctx).unwrap()), + }, + ); + map.insert( todo::EXTENSION_NAME, PlatformExtensionDef { diff --git a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__all_platform_extensions.snap b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__all_platform_extensions.snap index 73ad29d38c9c..6012f5361285 100644 --- a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__all_platform_extensions.snap +++ b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__all_platform_extensions.snap @@ -35,6 +35,16 @@ to discover what extensions can help. Use manage_extensions to enable or disable specific extensions by name. Use list_resources and read_resource to work with extension data and resources. +## analyze + +### Instructions +Analyze code structure using tree-sitter AST parsing. Three auto-selected modes: +- Directory path → structure overview (file tree with function/class counts) +- File path → semantic details (functions, classes, imports, call counts) +- Any path + focus parameter → symbol call graph (incoming/outgoing chains) + +For large codebases, delegate analysis to a subagent and retain only the summary. + ## apps apps supports resources. diff --git a/crates/goose/src/bin/analyze_cli.rs b/crates/goose/src/bin/analyze_cli.rs new file mode 100644 index 000000000000..a8df0e607088 --- /dev/null +++ b/crates/goose/src/bin/analyze_cli.rs @@ -0,0 +1,93 @@ +//! CLI wrapper for the analyze platform extension. +//! Usage: cargo run -p goose --bin analyze_cli -- [--focus ] [--depth ] [--follow ] [--force] + +use clap::Parser; +use goose::agents::platform_extensions::analyze::{format, graph, AnalyzeClient}; +use rayon::prelude::*; +use std::path::PathBuf; + +#[derive(Parser)] +#[command(name = "analyze_cli", about = "Ad-hoc code analysis via tree-sitter")] +struct Cli { + /// File or directory path to analyze + path: PathBuf, + /// Symbol name to focus on (triggers call graph mode) + #[arg(long)] + focus: Option, + /// Directory recursion depth limit (default 3, 0=unlimited) + #[arg(long, default_value_t = 3)] + depth: u32, + /// Call graph traversal depth (default 2, 0=definitions only) + #[arg(long, default_value_t = 2)] + follow: u32, + /// Allow large outputs without size warning + #[arg(long)] + force: bool, +} + +fn main() { + let cli = Cli::parse(); + let path = if cli.path.is_absolute() { + cli.path.clone() + } else { + std::env::current_dir().unwrap().join(&cli.path) + }; + + if !path.exists() { + eprintln!("Error: path not found: {}", path.display()); + std::process::exit(1); + } + + let output = if let Some(ref symbol) = cli.focus { + // Focused mode: symbol call graph + let files = if path.is_file() { + vec![path.clone()] + } else { + AnalyzeClient::collect_files(&path, cli.depth) + }; + let analyses: Vec<_> = files + .par_iter() + .filter_map(|f| AnalyzeClient::analyze_file(f)) + .collect(); + let root = if path.is_file() { + path.parent().unwrap_or(&path) + } else { + &path + }; + let g = graph::CallGraph::build(&analyses); + format::format_focused(symbol, &g, cli.follow, analyses.len(), root) + } else if path.is_file() { + // Semantic mode: single file details + match AnalyzeClient::analyze_file(&path) { + Some(analysis) => { + let root = path.parent().unwrap_or(&path); + format::format_semantic(&analysis, root) + } + None => { + eprintln!( + "Error: unsupported language or binary file: {}", + path.display() + ); + std::process::exit(1); + } + } + } else { + // Structure mode: directory overview + let files = AnalyzeClient::collect_files(&path, cli.depth); + let total_files = files.len(); + let analyses: Vec<_> = files + .par_iter() + .filter_map(|f| AnalyzeClient::analyze_file(f)) + .collect(); + format::format_structure(&analyses, &path, cli.depth, total_files) + }; + + match format::check_size(&output, cli.force) { + Ok(text) => print!("{text}"), + Err(warning) => { + eprintln!("{warning}"); + eprintln!("(use --force to see full output)"); + std::process::exit(2); + } + } +}