Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ A blazingly fast tool for peeking at codebases. Perfect for loading your codebas
- 🔗 Web content processing with Markdown conversion
- 📦 Git repository support
- 🌐 URL traversal with configurable depth
- 🏷️ XML output format for better LLM compatibility

## Installation

Expand Down Expand Up @@ -115,6 +116,9 @@ glimpse --config_path

# Initialize a .glimpse config file in the current directory
glimpse --config

# Output in XML format for better LLM compatibility
glimpse -x /path/to/project
```

## CLI Options
Expand Down Expand Up @@ -146,6 +150,7 @@ Options:
--traverse-links Traverse links when processing URLs
--link-depth <DEPTH> Maximum depth to traverse links (default: 1)
--pdf <PATH> Save output as PDF
-x, --xml Output in XML format for better LLM compatibility
-h, --help Print help
-V, --version Print version
```
Expand Down Expand Up @@ -179,6 +184,60 @@ default_excludes = [
]
```

## XML Output Format

Glimpse supports XML output format designed for better compatibility with Large Language Models (LLMs) like Claude, GPT, and others. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase.

### XML Structure

The XML output wraps all content in a `<context>` tag with the project name:

```xml
<context name="my_project">
<tree>
└── src/
└── main.rs
</tree>

<files>
<file path="src/main.rs">
================================================
fn main() {
println!("Hello, World!");
}
</file>
</files>

<summary>
Total files: 1
Total size: 45 bytes
</summary>
</context>
```

### Benefits for LLM Usage

- **Clear Context Boundaries**: The `<context>` wrapper helps LLMs understand where your codebase begins and ends
- **Structured Information**: Separate sections for directory tree, file contents, and summary
- **Proper Escaping**: XML-safe content that won't confuse parsers
- **Project Identification**: Automatic project name detection for better context

### Usage Examples

```bash
# Basic XML output
glimpse -x /path/to/project

# XML output with file save
glimpse -x -f project.xml /path/to/project

# XML output to stdout
glimpse -x --print /path/to/project

# XML output with specific includes
glimpse -x -i "*.rs,*.py" /path/to/project
```

## Token Counting

Glimpse supports two tokenizer backends:
Expand Down
37 changes: 36 additions & 1 deletion src/analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ pub fn process_directory(args: &Cli) -> Result<()> {
fs::write(pdf_path, pdf_data)?;
println!("PDF output written to: {}", pdf_path.display());
} else {
// Determine project name for XML output
let project_name = if args.xml {
Some(determine_project_name(&args.paths))
} else {
None
};

// Handle output (print/copy/save)
let output = generate_output(&entries, output_format)?;
let output = generate_output(&entries, output_format, args.xml, project_name)?;
handle_output(output, args)?;
}

Expand All @@ -54,6 +61,33 @@ pub fn process_directory(args: &Cli) -> Result<()> {
Ok(())
}

fn determine_project_name(paths: &[String]) -> String {
if let Some(first_path) = paths.first() {
let path = std::path::Path::new(first_path);

// If it's a directory, use its name
if path.is_dir() {
if let Some(name) = path.file_name() {
return name.to_string_lossy().to_string();
}
}

// If it's a file, use the parent directory name
if path.is_file() {
if let Some(parent) = path.parent() {
if let Some(name) = parent.file_name() {
return name.to_string_lossy().to_string();
}
}
}

// Fallback to just the path itself
first_path.clone()
} else {
"project".to_string()
}
}

pub fn process_entries(args: &Cli) -> Result<Vec<FileEntry>> {
let max_size = args.max_size.expect("max_size should be set from config");
let max_depth = args.max_depth.expect("max_depth should be set from config");
Expand Down Expand Up @@ -361,6 +395,7 @@ mod tests {
pdf: None,
traverse_links: false,
link_depth: None,
xml: false,
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ pub struct Cli {
/// Maximum depth to traverse sublinks (default: 1)
#[arg(long)]
pub link_depth: Option<usize>,

/// Output in XML format for better LLM compatibility
#[arg(short = 'x', long)]
pub xml: bool,
}

impl Cli {
Expand Down
169 changes: 146 additions & 23 deletions src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,97 @@ pub struct FileEntry {
pub size: u64,
}

pub fn generate_output(entries: &[FileEntry], format: OutputFormat) -> Result<String> {
pub fn generate_output(
entries: &[FileEntry],
format: OutputFormat,
xml_format: bool,
project_name: Option<String>,
) -> Result<String> {
let mut output = String::new();

if xml_format {
let project_name = project_name.unwrap_or_else(|| "project".to_string());
output.push_str(&format!(
"<context name=\"{}\">\n",
xml_escape(&project_name)
));
}

match format {
OutputFormat::Tree => {
output.push_str("Directory Structure:\n");
if xml_format {
output.push_str("<tree>\n");
} else {
output.push_str("Directory Structure:\n");
}
output.push_str(&generate_tree(entries)?);
if xml_format {
output.push_str("</tree>\n");
}
}
OutputFormat::Files => {
output.push_str("File Contents:\n");
output.push_str(&generate_files(entries)?);
if xml_format {
output.push_str("<files>\n");
} else {
output.push_str("File Contents:\n");
}
output.push_str(&generate_files(entries, xml_format)?);
if xml_format {
output.push_str("</files>\n");
}
}
OutputFormat::Both => {
output.push_str("Directory Structure:\n");
if xml_format {
output.push_str("<tree>\n");
} else {
output.push_str("Directory Structure:\n");
}
output.push_str(&generate_tree(entries)?);
output.push_str("\nFile Contents:\n");
output.push_str(&generate_files(entries)?);
if xml_format {
output.push_str("</tree>\n\n<files>\n");
} else {
output.push_str("\nFile Contents:\n");
}
output.push_str(&generate_files(entries, xml_format)?);
if xml_format {
output.push_str("</files>\n");
}
}
}

// Add summary
output.push_str("\nSummary:\n");
output.push_str(&format!("Total files: {}\n", entries.len()));
output.push_str(&format!(
"Total size: {} bytes\n",
entries.iter().map(|e| e.size).sum::<u64>()
));
if xml_format {
output.push_str("<summary>\n");
output.push_str(&format!("Total files: {}\n", entries.len()));
output.push_str(&format!(
"Total size: {} bytes\n",
entries.iter().map(|e| e.size).sum::<u64>()
));
output.push_str("</summary>\n");
} else {
output.push_str("\nSummary:\n");
output.push_str(&format!("Total files: {}\n", entries.len()));
output.push_str(&format!(
"Total size: {} bytes\n",
entries.iter().map(|e| e.size).sum::<u64>()
));
}

if xml_format {
output.push_str("</context>");
}

Ok(output)
}

fn xml_escape(text: &str) -> String {
text.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}

pub fn display_token_counts(token_counter: TokenCounter, entries: &[FileEntry]) -> Result<()> {
let token_count = token_counter.count_files(entries)?;

Expand Down Expand Up @@ -117,15 +177,27 @@ fn generate_tree(entries: &[FileEntry]) -> Result<String> {
Ok(output)
}

fn generate_files(entries: &[FileEntry]) -> Result<String> {
fn generate_files(entries: &[FileEntry], xml_format: bool) -> Result<String> {
let mut output = String::new();

for entry in entries {
output.push_str(&format!("\nFile: {}\n", entry.path.display()));
output.push_str(&"=".repeat(48));
output.push('\n');
output.push_str(&entry.content);
output.push('\n');
if xml_format {
output.push_str(&format!(
"<file path=\"{}\">\n",
xml_escape(entry.path.display().to_string().as_str())
));
output.push_str(&"=".repeat(48));
output.push('\n');
output.push_str(&entry.content);
output.push('\n');
output.push_str("</file>\n");
} else {
output.push_str(&format!("\nFile: {}\n", entry.path.display()));
output.push_str(&"=".repeat(48));
output.push('\n');
output.push_str(&entry.content);
output.push('\n');
}
}

Ok(output)
Expand Down Expand Up @@ -280,7 +352,7 @@ mod tests {
#[test]
fn test_files_output() {
let entries = create_test_entries();
let files = generate_files(&entries).unwrap();
let files = generate_files(&entries, false).unwrap();
let expected = format!(
"\nFile: {}\n{}\n{}\n\nFile: {}\n{}\n{}\n",
"src/main.rs",
Expand All @@ -298,23 +370,73 @@ mod tests {
let entries = create_test_entries();

// Test tree format
let tree_output = generate_output(&entries, OutputFormat::Tree).unwrap();
let tree_output = generate_output(&entries, OutputFormat::Tree, false, None).unwrap();
assert!(tree_output.contains("Directory Structure:"));
assert!(tree_output.contains("src/"));
assert!(tree_output.contains("main.rs"));

// Test files format
let files_output = generate_output(&entries, OutputFormat::Files).unwrap();
let files_output = generate_output(&entries, OutputFormat::Files, false, None).unwrap();
assert!(files_output.contains("File Contents:"));
assert!(files_output.contains("fn main()"));
assert!(files_output.contains("pub fn helper()"));

// Test both format
let both_output = generate_output(&entries, OutputFormat::Both).unwrap();
let both_output = generate_output(&entries, OutputFormat::Both, false, None).unwrap();
assert!(both_output.contains("Directory Structure:"));
assert!(both_output.contains("File Contents:"));
}

#[test]
fn test_xml_output() {
let entries = create_test_entries();

// Test XML tree format
let xml_tree_output = generate_output(
&entries,
OutputFormat::Tree,
true,
Some("test_project".to_string()),
)
.unwrap();
assert!(xml_tree_output.contains("<context name=\"test_project\">"));
assert!(xml_tree_output.contains("<tree>"));
assert!(xml_tree_output.contains("</tree>"));
assert!(xml_tree_output.contains("<summary>"));
assert!(xml_tree_output.contains("</summary>"));
assert!(xml_tree_output.contains("</context>"));

// Test XML files format
let xml_files_output = generate_output(
&entries,
OutputFormat::Files,
true,
Some("test_project".to_string()),
)
.unwrap();
assert!(xml_files_output.contains("<context name=\"test_project\">"));
assert!(xml_files_output.contains("<files>"));
assert!(xml_files_output.contains("<file path=\"src/main.rs\">"));
assert!(xml_files_output.contains("</file>"));
assert!(xml_files_output.contains("</files>"));
assert!(xml_files_output.contains("</context>"));

// Test XML both format
let xml_both_output = generate_output(
&entries,
OutputFormat::Both,
true,
Some("test_project".to_string()),
)
.unwrap();
assert!(xml_both_output.contains("<context name=\"test_project\">"));
assert!(xml_both_output.contains("<tree>"));
assert!(xml_both_output.contains("</tree>"));
assert!(xml_both_output.contains("<files>"));
assert!(xml_both_output.contains("</files>"));
assert!(xml_both_output.contains("</context>"));
}

#[test]
fn test_handle_output() {
use tempfile::tempdir;
Expand Down Expand Up @@ -345,6 +467,7 @@ mod tests {
traverse_links: false,
link_depth: None,
config_path: false,
xml: false,
};

handle_output(content.clone(), &args).unwrap();
Expand Down
1 change: 1 addition & 0 deletions test_project/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub fn helper() { println!("Helper function"); }
1 change: 1 addition & 0 deletions test_project/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fn main() { println!("Hello, world!"); }