Skip to content

Commit 57a13d8

Browse files
authored
Add XML feature for Claude compatibility (#21)
1 parent 1e1dca8 commit 57a13d8

File tree

6 files changed

+247
-24
lines changed

6 files changed

+247
-24
lines changed

readme.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ A blazingly fast tool for peeking at codebases. Perfect for loading your codebas
1616
- 🔗 Web content processing with Markdown conversion
1717
- 📦 Git repository support
1818
- 🌐 URL traversal with configurable depth
19+
- 🏷️ XML output format for better LLM compatibility
1920

2021
## Installation
2122

@@ -115,6 +116,9 @@ glimpse --config_path
115116

116117
# Initialize a .glimpse config file in the current directory
117118
glimpse --config
119+
120+
# Output in XML format for better LLM compatibility
121+
glimpse -x /path/to/project
118122
```
119123

120124
## CLI Options
@@ -146,6 +150,7 @@ Options:
146150
--traverse-links Traverse links when processing URLs
147151
--link-depth <DEPTH> Maximum depth to traverse links (default: 1)
148152
--pdf <PATH> Save output as PDF
153+
-x, --xml Output in XML format for better LLM compatibility
149154
-h, --help Print help
150155
-V, --version Print version
151156
```
@@ -179,6 +184,60 @@ default_excludes = [
179184
]
180185
```
181186

187+
## XML Output Format
188+
189+
Glimpse supports XML output format designed for better compatibility with Large Language Models (LLMs) like Claude, GPT, and others. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase.
190+
191+
### XML Structure
192+
193+
The XML output wraps all content in a `<context>` tag with the project name:
194+
195+
```xml
196+
<context name="my_project">
197+
<tree>
198+
└── src/
199+
└── main.rs
200+
</tree>
201+
202+
<files>
203+
<file path="src/main.rs">
204+
================================================
205+
fn main() {
206+
println!("Hello, World!");
207+
}
208+
</file>
209+
</files>
210+
211+
<summary>
212+
Total files: 1
213+
Total size: 45 bytes
214+
</summary>
215+
</context>
216+
```
217+
218+
### Benefits for LLM Usage
219+
220+
- **Clear Context Boundaries**: The `<context>` wrapper helps LLMs understand where your codebase begins and ends
221+
- **Structured Information**: Separate sections for directory tree, file contents, and summary
222+
- **Proper Escaping**: XML-safe content that won't confuse parsers
223+
- **Project Identification**: Automatic project name detection for better context
224+
225+
### Usage Examples
226+
227+
```bash
228+
# Basic XML output
229+
glimpse -x /path/to/project
230+
231+
# XML output with file save
232+
glimpse -x -f project.xml /path/to/project
233+
234+
# XML output to stdout
235+
glimpse -x --print /path/to/project
236+
237+
# XML output with specific includes
238+
glimpse -x -i "*.rs,*.py" /path/to/project
239+
```
240+
182241
## Token Counting
183242

184243
Glimpse supports two tokenizer backends:

src/analyzer.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,15 @@ pub fn process_directory(args: &Cli) -> Result<()> {
4141
fs::write(pdf_path, pdf_data)?;
4242
println!("PDF output written to: {}", pdf_path.display());
4343
} else {
44+
// Determine project name for XML output
45+
let project_name = if args.xml {
46+
Some(determine_project_name(&args.paths))
47+
} else {
48+
None
49+
};
50+
4451
// Handle output (print/copy/save)
45-
let output = generate_output(&entries, output_format)?;
52+
let output = generate_output(&entries, output_format, args.xml, project_name)?;
4653
handle_output(output, args)?;
4754
}
4855

@@ -54,6 +61,33 @@ pub fn process_directory(args: &Cli) -> Result<()> {
5461
Ok(())
5562
}
5663

64+
fn determine_project_name(paths: &[String]) -> String {
65+
if let Some(first_path) = paths.first() {
66+
let path = std::path::Path::new(first_path);
67+
68+
// If it's a directory, use its name
69+
if path.is_dir() {
70+
if let Some(name) = path.file_name() {
71+
return name.to_string_lossy().to_string();
72+
}
73+
}
74+
75+
// If it's a file, use the parent directory name
76+
if path.is_file() {
77+
if let Some(parent) = path.parent() {
78+
if let Some(name) = parent.file_name() {
79+
return name.to_string_lossy().to_string();
80+
}
81+
}
82+
}
83+
84+
// Fallback to just the path itself
85+
first_path.clone()
86+
} else {
87+
"project".to_string()
88+
}
89+
}
90+
5791
pub fn process_entries(args: &Cli) -> Result<Vec<FileEntry>> {
5892
let max_size = args.max_size.expect("max_size should be set from config");
5993
let max_depth = args.max_depth.expect("max_depth should be set from config");
@@ -361,6 +395,7 @@ mod tests {
361395
pdf: None,
362396
traverse_links: false,
363397
link_depth: None,
398+
xml: false,
364399
}
365400
}
366401

src/cli.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ pub struct Cli {
114114
/// Maximum depth to traverse sublinks (default: 1)
115115
#[arg(long)]
116116
pub link_depth: Option<usize>,
117+
118+
/// Output in XML format for better LLM compatibility
119+
#[arg(short = 'x', long)]
120+
pub xml: bool,
117121
}
118122

119123
impl Cli {

src/output.rs

Lines changed: 146 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,97 @@ pub struct FileEntry {
1515
pub size: u64,
1616
}
1717

18-
pub fn generate_output(entries: &[FileEntry], format: OutputFormat) -> Result<String> {
18+
pub fn generate_output(
19+
entries: &[FileEntry],
20+
format: OutputFormat,
21+
xml_format: bool,
22+
project_name: Option<String>,
23+
) -> Result<String> {
1924
let mut output = String::new();
2025

26+
if xml_format {
27+
let project_name = project_name.unwrap_or_else(|| "project".to_string());
28+
output.push_str(&format!(
29+
"<context name=\"{}\">\n",
30+
xml_escape(&project_name)
31+
));
32+
}
33+
2134
match format {
2235
OutputFormat::Tree => {
23-
output.push_str("Directory Structure:\n");
36+
if xml_format {
37+
output.push_str("<tree>\n");
38+
} else {
39+
output.push_str("Directory Structure:\n");
40+
}
2441
output.push_str(&generate_tree(entries)?);
42+
if xml_format {
43+
output.push_str("</tree>\n");
44+
}
2545
}
2646
OutputFormat::Files => {
27-
output.push_str("File Contents:\n");
28-
output.push_str(&generate_files(entries)?);
47+
if xml_format {
48+
output.push_str("<files>\n");
49+
} else {
50+
output.push_str("File Contents:\n");
51+
}
52+
output.push_str(&generate_files(entries, xml_format)?);
53+
if xml_format {
54+
output.push_str("</files>\n");
55+
}
2956
}
3057
OutputFormat::Both => {
31-
output.push_str("Directory Structure:\n");
58+
if xml_format {
59+
output.push_str("<tree>\n");
60+
} else {
61+
output.push_str("Directory Structure:\n");
62+
}
3263
output.push_str(&generate_tree(entries)?);
33-
output.push_str("\nFile Contents:\n");
34-
output.push_str(&generate_files(entries)?);
64+
if xml_format {
65+
output.push_str("</tree>\n\n<files>\n");
66+
} else {
67+
output.push_str("\nFile Contents:\n");
68+
}
69+
output.push_str(&generate_files(entries, xml_format)?);
70+
if xml_format {
71+
output.push_str("</files>\n");
72+
}
3573
}
3674
}
3775

3876
// Add summary
39-
output.push_str("\nSummary:\n");
40-
output.push_str(&format!("Total files: {}\n", entries.len()));
41-
output.push_str(&format!(
42-
"Total size: {} bytes\n",
43-
entries.iter().map(|e| e.size).sum::<u64>()
44-
));
77+
if xml_format {
78+
output.push_str("<summary>\n");
79+
output.push_str(&format!("Total files: {}\n", entries.len()));
80+
output.push_str(&format!(
81+
"Total size: {} bytes\n",
82+
entries.iter().map(|e| e.size).sum::<u64>()
83+
));
84+
output.push_str("</summary>\n");
85+
} else {
86+
output.push_str("\nSummary:\n");
87+
output.push_str(&format!("Total files: {}\n", entries.len()));
88+
output.push_str(&format!(
89+
"Total size: {} bytes\n",
90+
entries.iter().map(|e| e.size).sum::<u64>()
91+
));
92+
}
93+
94+
if xml_format {
95+
output.push_str("</context>");
96+
}
4597

4698
Ok(output)
4799
}
48100

101+
fn xml_escape(text: &str) -> String {
102+
text.replace('&', "&amp;")
103+
.replace('<', "&lt;")
104+
.replace('>', "&gt;")
105+
.replace('"', "&quot;")
106+
.replace('\'', "&apos;")
107+
}
108+
49109
pub fn display_token_counts(token_counter: TokenCounter, entries: &[FileEntry]) -> Result<()> {
50110
let token_count = token_counter.count_files(entries)?;
51111

@@ -117,15 +177,27 @@ fn generate_tree(entries: &[FileEntry]) -> Result<String> {
117177
Ok(output)
118178
}
119179

120-
fn generate_files(entries: &[FileEntry]) -> Result<String> {
180+
fn generate_files(entries: &[FileEntry], xml_format: bool) -> Result<String> {
121181
let mut output = String::new();
122182

123183
for entry in entries {
124-
output.push_str(&format!("\nFile: {}\n", entry.path.display()));
125-
output.push_str(&"=".repeat(48));
126-
output.push('\n');
127-
output.push_str(&entry.content);
128-
output.push('\n');
184+
if xml_format {
185+
output.push_str(&format!(
186+
"<file path=\"{}\">\n",
187+
xml_escape(entry.path.display().to_string().as_str())
188+
));
189+
output.push_str(&"=".repeat(48));
190+
output.push('\n');
191+
output.push_str(&entry.content);
192+
output.push('\n');
193+
output.push_str("</file>\n");
194+
} else {
195+
output.push_str(&format!("\nFile: {}\n", entry.path.display()));
196+
output.push_str(&"=".repeat(48));
197+
output.push('\n');
198+
output.push_str(&entry.content);
199+
output.push('\n');
200+
}
129201
}
130202

131203
Ok(output)
@@ -280,7 +352,7 @@ mod tests {
280352
#[test]
281353
fn test_files_output() {
282354
let entries = create_test_entries();
283-
let files = generate_files(&entries).unwrap();
355+
let files = generate_files(&entries, false).unwrap();
284356
let expected = format!(
285357
"\nFile: {}\n{}\n{}\n\nFile: {}\n{}\n{}\n",
286358
"src/main.rs",
@@ -298,23 +370,73 @@ mod tests {
298370
let entries = create_test_entries();
299371

300372
// Test tree format
301-
let tree_output = generate_output(&entries, OutputFormat::Tree).unwrap();
373+
let tree_output = generate_output(&entries, OutputFormat::Tree, false, None).unwrap();
302374
assert!(tree_output.contains("Directory Structure:"));
303375
assert!(tree_output.contains("src/"));
304376
assert!(tree_output.contains("main.rs"));
305377

306378
// Test files format
307-
let files_output = generate_output(&entries, OutputFormat::Files).unwrap();
379+
let files_output = generate_output(&entries, OutputFormat::Files, false, None).unwrap();
308380
assert!(files_output.contains("File Contents:"));
309381
assert!(files_output.contains("fn main()"));
310382
assert!(files_output.contains("pub fn helper()"));
311383

312384
// Test both format
313-
let both_output = generate_output(&entries, OutputFormat::Both).unwrap();
385+
let both_output = generate_output(&entries, OutputFormat::Both, false, None).unwrap();
314386
assert!(both_output.contains("Directory Structure:"));
315387
assert!(both_output.contains("File Contents:"));
316388
}
317389

390+
#[test]
391+
fn test_xml_output() {
392+
let entries = create_test_entries();
393+
394+
// Test XML tree format
395+
let xml_tree_output = generate_output(
396+
&entries,
397+
OutputFormat::Tree,
398+
true,
399+
Some("test_project".to_string()),
400+
)
401+
.unwrap();
402+
assert!(xml_tree_output.contains("<context name=\"test_project\">"));
403+
assert!(xml_tree_output.contains("<tree>"));
404+
assert!(xml_tree_output.contains("</tree>"));
405+
assert!(xml_tree_output.contains("<summary>"));
406+
assert!(xml_tree_output.contains("</summary>"));
407+
assert!(xml_tree_output.contains("</context>"));
408+
409+
// Test XML files format
410+
let xml_files_output = generate_output(
411+
&entries,
412+
OutputFormat::Files,
413+
true,
414+
Some("test_project".to_string()),
415+
)
416+
.unwrap();
417+
assert!(xml_files_output.contains("<context name=\"test_project\">"));
418+
assert!(xml_files_output.contains("<files>"));
419+
assert!(xml_files_output.contains("<file path=\"src/main.rs\">"));
420+
assert!(xml_files_output.contains("</file>"));
421+
assert!(xml_files_output.contains("</files>"));
422+
assert!(xml_files_output.contains("</context>"));
423+
424+
// Test XML both format
425+
let xml_both_output = generate_output(
426+
&entries,
427+
OutputFormat::Both,
428+
true,
429+
Some("test_project".to_string()),
430+
)
431+
.unwrap();
432+
assert!(xml_both_output.contains("<context name=\"test_project\">"));
433+
assert!(xml_both_output.contains("<tree>"));
434+
assert!(xml_both_output.contains("</tree>"));
435+
assert!(xml_both_output.contains("<files>"));
436+
assert!(xml_both_output.contains("</files>"));
437+
assert!(xml_both_output.contains("</context>"));
438+
}
439+
318440
#[test]
319441
fn test_handle_output() {
320442
use tempfile::tempdir;
@@ -345,6 +467,7 @@ mod tests {
345467
traverse_links: false,
346468
link_depth: None,
347469
config_path: false,
470+
xml: false,
348471
};
349472

350473
handle_output(content.clone(), &args).unwrap();

test_project/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub fn helper() { println!("Helper function"); }

test_project/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fn main() { println!("Hello, world!"); }

0 commit comments

Comments
 (0)