-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Only show up to 50 lines of source code #7578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,8 +21,81 @@ | |
| //! ``` | ||
|
|
||
| use regex::Regex; | ||
| use std::io::Write; | ||
| use std::sync::LazyLock; | ||
|
|
||
| const MAX_CODE_BLOCK_LINES: usize = 50; | ||
| const TRUNCATED_SHOW_LINES: usize = 20; | ||
|
|
||
| fn truncate_code_blocks(content: &str) -> String { | ||
| let (open_pos, fence) = match (content.find("```"), content.find("~~~")) { | ||
| (Some(a), Some(b)) if a <= b => (a, "```"), | ||
| (Some(a), None) => (a, "```"), | ||
| (None, Some(b)) => (b, "~~~"), | ||
| (Some(_), Some(b)) => (b, "~~~"), | ||
| (None, None) => return content.to_string(), | ||
| }; | ||
|
|
||
| let Some(after_open) = content.get(open_pos + 3..) else { | ||
| return content.to_string(); | ||
| }; | ||
| let Some(newline_pos) = after_open.find('\n') else { | ||
| return content.to_string(); | ||
| }; | ||
| let code_start = open_pos + 3 + newline_pos + 1; | ||
|
|
||
| let Some(code_region) = content.get(code_start..) else { | ||
| return content.to_string(); | ||
| }; | ||
| let close_pattern = format!("\n{}", fence); | ||
| let Some(close_offset) = code_region.find(&close_pattern) else { | ||
|
Comment on lines
+50
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The truncation scan only looks for Useful? React with 👍 / 👎. |
||
| return content.to_string(); | ||
| }; | ||
|
|
||
| let Some(code_content) = code_region.get(..close_offset) else { | ||
| return content.to_string(); | ||
| }; | ||
| let lines: Vec<&str> = code_content.lines().collect(); | ||
|
|
||
| if lines.len() <= MAX_CODE_BLOCK_LINES { | ||
| return content.to_string(); | ||
|
Comment on lines
+60
to
+61
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| let truncated: String = lines | ||
| .iter() | ||
| .take(TRUNCATED_SHOW_LINES) | ||
| .copied() | ||
| .collect::<Vec<_>>() | ||
| .join("\n"); | ||
| let remaining = lines.len() - TRUNCATED_SHOW_LINES; | ||
|
|
||
| let file_msg = save_to_temp_file(code_content) | ||
| .map(|p| format!(" → {}", p)) | ||
| .unwrap_or_default(); | ||
|
|
||
| let close_pos = code_start + close_offset + 1; // +1 to include the \n | ||
| let prefix = content.get(..code_start).unwrap_or(""); | ||
| let suffix = content.get(close_pos..).unwrap_or(""); | ||
| format!( | ||
| "{}{}\n... ({} more lines{})\n{}", | ||
| prefix, truncated, remaining, file_msg, suffix | ||
| ) | ||
| } | ||
|
|
||
| fn save_to_temp_file(content: &str) -> Option<String> { | ||
| let mut file = tempfile::Builder::new() | ||
| .prefix("goose-") | ||
| .suffix(".txt") | ||
| .tempfile() | ||
| .ok()?; | ||
|
|
||
| file.write_all(content.as_bytes()).ok()?; | ||
|
|
||
| // Keep the file (don't delete on drop) and get the path | ||
| let (_, path) = file.keep().ok()?; | ||
| Some(path.display().to_string()) | ||
| } | ||
|
|
||
| /// Regex that tokenizes markdown inline elements. | ||
| /// Order matters: longer/more-specific patterns first. | ||
| static INLINE_TOKEN_RE: LazyLock<Regex> = LazyLock::new(|| { | ||
|
|
@@ -52,7 +125,8 @@ | |
| /// A streaming markdown buffer that tracks open constructs. | ||
| /// | ||
| /// Accumulates chunks and returns content that is safe to render, | ||
| /// holding back any incomplete markdown constructs. | ||
| /// holding back any incomplete markdown constructs. Large code blocks | ||
| /// are automatically truncated with full content saved to a temp file. | ||
| #[derive(Default)] | ||
| pub struct MarkdownBuffer { | ||
| buffer: String, | ||
|
|
@@ -106,7 +180,8 @@ | |
| /// Add a chunk of markdown text to the buffer. | ||
| /// | ||
| /// Returns any content that is safe to render, or None if the buffer | ||
| /// contains only incomplete constructs. | ||
| /// contains only incomplete constructs. Large code blocks are automatically | ||
| /// truncated with full content saved to a temp file. | ||
| pub fn push(&mut self, chunk: &str) -> Option<String> { | ||
| self.buffer.push_str(chunk); | ||
| let safe_end = self.find_safe_end(); | ||
|
|
@@ -118,7 +193,7 @@ | |
| // - The regex tokenizer operates on &str which guarantees UTF-8 | ||
| let to_render = self.buffer[..safe_end].to_string(); | ||
| self.buffer = self.buffer[safe_end..].to_string(); | ||
| Some(to_render) | ||
| Some(truncate_code_blocks(&to_render)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The truncation logic is only applied in Useful? React with 👍 / 👎. |
||
| } else { | ||
| None | ||
| } | ||
|
|
@@ -590,7 +665,8 @@ | |
| &["Start of ", "`code"] | ||
| ; "unclosed inline code flushes" | ||
| )] | ||
| fn test_incomplete_constructs(chunks: &[&str], expected: &[&str]) { | ||
| assert_eq!(stream(chunks), expected); | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
truncate_code_blockssearches for the end fence using"\n```"/"\n~~~"regardless of how long the opening fence actually is, so content opened with longer fences (for example ````md blocks that embed inner ``` snippets) is treated as closed at the first inner triple fence. In that scenario,lines.len()is computed on only a prefix of the real block and oversized code blocks are left untruncated even though `check_code_fence` correctly supports longer fences.Useful? React with 👍 / 👎.