feat(gateway): add attachment flows, v2 skill install coverage, and e2e stabilization#2385
feat(gateway): add attachment flows, v2 skill install coverage, and e2e stabilization#2385ilblackdragon wants to merge 7 commits intostagingfrom
Conversation
There was a problem hiding this comment.
Code Review
This pull request implements a robust file attachment system that supports various document types, persists them to project-local storage, and indexes them for the engine. It also enhances skill management by supporting multi-file bundles from ZIPs and GitHub, and introduces a unified slash-command autocomplete. Feedback from the review suggests refining the slash command regex to prevent accidental matches with file paths and ensuring consistent MIME type handling and XML decoding between the frontend and backend. Specifically, support for docx and xlsx should be added to the frontend inference logic, and MIME mapping should be unified to prevent inconsistencies in file naming.
| rewritten = goal | ||
| replacements = [] | ||
|
|
||
| for match in re.finditer(r'(^|[\s"\(])/(?P<name>[A-Za-z0-9._-]+)', goal): |
There was a problem hiding this comment.
The current regex for slash commands is too broad and will match path segments (e.g., /etc in /etc/passwd), leading to incorrect "missing skill" system messages. Following the principle of using word boundaries to avoid false positives, adding a lookahead for whitespace, quotes, or parentheses ensures it only matches intended command prefixes.
| for match in re.finditer(r'(^|[\s"\(])/(?P<name>[A-Za-z0-9._-]+)', goal): | |
| for match in re.finditer(r'(^|[\s"\(])/(?P<name>[A-Za-z0-9._-]+)(?=$|[\s"\(])', goal): |
References
- Use word boundary validation to avoid false positives where a name is a substring of another word.
| function inferAttachmentMimeType(file) { | ||
| if (file.type) return file.type; | ||
| const name = (file.name || '').toLowerCase(); | ||
| if (name.endsWith('.pdf')) return 'application/pdf'; | ||
| if (name.endsWith('.pptx')) return 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; | ||
| if (name.endsWith('.ppt')) return 'application/vnd.ms-powerpoint'; | ||
| if (name.endsWith('.md')) return 'text/markdown'; | ||
| if (name.endsWith('.csv')) return 'text/csv'; | ||
| if (name.endsWith('.json')) return 'application/json'; | ||
| if (name.endsWith('.xml')) return 'application/xml'; | ||
| if (name.endsWith('.txt')) return 'text/plain'; | ||
| return 'application/octet-stream'; | ||
| } |
There was a problem hiding this comment.
This function is missing docx and xlsx support, which are handled in the backend. It should be updated to include these common document types for consistent MIME inference.
function inferAttachmentMimeType(file) {
if (file.type) return file.type;
const name = (file.name || '').toLowerCase();
if (name.endsWith('.pdf')) return 'application/pdf';
if (name.endsWith('.pptx')) return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
if (name.endsWith('.ppt')) return 'application/vnd.ms-powerpoint';
if (name.endsWith('.docx')) return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
if (name.endsWith('.xlsx')) return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
if (name.endsWith('.md')) return 'text/markdown';
if (name.endsWith('.csv')) return 'text/csv';
if (name.endsWith('.json')) return 'application/json';
if (name.endsWith('.xml')) return 'application/xml';
if (name.endsWith('.txt')) return 'text/plain';
return 'application/octet-stream';
}| function decodeXmlText(text) { | ||
| return text | ||
| .replace(/"/g, '"') | ||
| .replace(/</g, '<') | ||
| .replace(/>/g, '>') | ||
| .replace(/&/g, '&'); | ||
| } |
There was a problem hiding this comment.
The decodeXmlText function is missing the ' entity, which is used by the backend's escape_xml_attr helper when serializing attachment attributes.
| function decodeXmlText(text) { | |
| return text | |
| .replace(/"/g, '"') | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/&/g, '&'); | |
| } | |
| function decodeXmlText(text) { | |
| return text | |
| .replace(/"/g, '"') | |
| .replace(/'/g, "'") | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/&/g, '&'); | |
| } |
| fn fallback_attachment_filename(index: usize, mime_type: &str) -> String { | ||
| let ext = match mime_type.split(';').next().unwrap_or(mime_type).trim() { | ||
| "image/png" => "png", | ||
| "image/jpeg" => "jpg", | ||
| "image/webp" => "webp", | ||
| "image/gif" => "gif", | ||
| "application/pdf" => "pdf", | ||
| "text/plain" => "txt", | ||
| "audio/mpeg" => "mp3", | ||
| "audio/wav" => "wav", | ||
| "audio/x-wav" => "wav", | ||
| "audio/ogg" => "ogg", | ||
| "application/vnd.openxmlformats-officedocument.presentationml.presentation" => "pptx", | ||
| "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => "docx", | ||
| "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => "xlsx", | ||
| _ => "bin", | ||
| }; | ||
| format!("attachment-{}.{}", index + 1, ext) | ||
| } |
There was a problem hiding this comment.
The MIME to extension mapping here is inconsistent with mime_to_ext in server.rs. For example, it's missing md, csv, json, and xml. As per the rule to centralize logic for consistent behavior, these mappings should be unified to ensure consistent file naming across different persistence paths.
fn fallback_attachment_filename(index: usize, mime_type: &str) -> String {
let ext = match mime_type.split(';').next().unwrap_or(mime_type).trim() {
"image/png" => "png",
"image/jpeg" | "image/jpg" => "jpg",
"image/webp" => "webp",
"image/gif" => "gif",
"image/svg+xml" => "svg",
"application/pdf" => "pdf",
"text/plain" => "txt",
"text/markdown" => "md",
"text/csv" => "csv",
"application/json" => "json",
"application/xml" | "text/xml" => "xml",
"audio/mpeg" => "mp3",
"audio/wav" | "audio/x-wav" => "wav",
"audio/ogg" => "ogg",
"application/vnd.openxmlformats-officedocument.presentationml.presentation" => "pptx",
"application/vnd.ms-powerpoint" => "ppt",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => "docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => "xlsx",
_ if mime_type.starts_with("image/") => "jpg",
_ => "bin",
};
format!("attachment-{}.{}", index + 1, ext)
}References
- Centralize parsing logic into a single function to ensure consistent behavior across all call sites.
| "image/gif" => "gif", | ||
| "image/webp" => "webp", | ||
| "image/svg+xml" => "svg", | ||
| _ => "jpg", | ||
| "application/pdf" => "pdf", | ||
| "text/plain" => "txt", | ||
| "text/markdown" => "md", | ||
| "text/csv" => "csv", | ||
| "application/json" => "json", | ||
| "application/xml" | "text/xml" => "xml", | ||
| "application/vnd.openxmlformats-officedocument.presentationml.presentation" => "pptx", | ||
| "application/vnd.ms-powerpoint" => "ppt", | ||
| _ if mime.starts_with("image/") => "jpg", | ||
| _ => "bin", | ||
| } | ||
| } |
There was a problem hiding this comment.
This function doesn't handle MIME parameters (e.g., text/plain; charset=utf-8) which will cause it to return bin for common types. It's also missing several types like docx, xlsx, and audio formats. Centralizing this logic ensures consistent behavior across the repository.
fn mime_to_ext(mime: &str) -> &str {
let base_mime = mime.split(';').next().unwrap_or(mime).trim();
match base_mime {
"image/png" => "png",
"image/jpeg" | "image/jpg" => "jpg",
"image/gif" => "gif",
"image/webp" => "webp",
"image/svg+xml" => "svg",
"application/pdf" => "pdf",
"text/plain" => "txt",
"text/markdown" => "md",
"text/csv" => "csv",
"application/json" => "json",
"application/xml" | "text/xml" => "xml",
"audio/mpeg" => "mp3",
"audio/wav" | "audio/x-wav" => "wav",
"audio/ogg" => "ogg",
"application/vnd.openxmlformats-officedocument.presentationml.presentation" => "pptx",
"application/vnd.ms-powerpoint" => "ppt",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => "docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => "xlsx",
_ if base_mime.starts_with("image/") => "jpg",
_ => "bin",
}
}References
- Centralize parsing logic into a single function to ensure consistent behavior across all call sites.
serrrfirat
left a comment
There was a problem hiding this comment.
Security and code quality review -- 12 findings (2 Critical, 4 High, 6 Medium). Inline comments below.
| repo.owner, repo.repo, repo.branch | ||
| )) | ||
| .map_err(|e| SkillFetchError::from_message(format!("Invalid GitHub archive URL: {e}")))?; | ||
| let bytes = fetch_url_bytes(&archive_url).await?; |
There was a problem hiding this comment.
Critical Severity — SSRF bypass via GitHub URL rewriting
owner and repo from parse_github_repo_ref are interpolated into URLs for codeload.github.com, api.github.com, and raw.githubusercontent.com. The validate_fetch_url SSRF check runs on the original URL but NOT on these reconstructed URLs — they bypass SSRF validation entirely.
Also, owner/repo are not validated against path-special characters (/, .., %2F). A crafted URL could target unexpected endpoints on these hosts.
Suggested fix: Run validate_fetch_url on every derived URL before fetching. Validate owner/repo match [A-Za-z0-9._-]+.
| validate_fetch_url(url).map_err(|e| SkillFetchError::from_message(e.to_string()))?; | ||
|
|
||
| if let Some(raw_url) = github_blob_raw_url(&parsed) { | ||
| let bytes = fetch_url_bytes(&raw_url).await?; |
There was a problem hiding this comment.
Critical Severity — SSRF bypass via raw.githubusercontent.com rewrite
github_blob_raw_url rewrites a github.com/blob/ URL to raw.githubusercontent.com without calling validate_fetch_url on the new URL. The original SSRF allowlist check ran against github.com, not raw.githubusercontent.com.
Suggested fix: Call validate_fetch_url on the constructed raw_url before passing to fetch_url_bytes.
| .unwrap_or_else(|| fallback_attachment_filename(index, &attachment.mime_type)); | ||
| format!( | ||
| "{}/{}/{}/{}/{}-{}", | ||
| PROJECT_ATTACHMENT_DIR, owner, project_id, date, message.id, filename |
There was a problem hiding this comment.
High Severity — Path traversal via unsanitized message.id
attachment_project_relative_path applies sanitize_attachment_segment to user_id and filename, but message.id is inserted into the path without sanitization. While currently a UUID (safe by construction), IncomingMessage::with_id() or field mutation could allow ../ injection, writing files outside .ironclaw/attachments/.
Suggested fix: Apply sanitize_attachment_segment to message.id.to_string(), or validate it is a valid UUID. Use Path::components() validation like validate_install_relative_path.
| } | ||
| Err(e) => { | ||
| let error_msg = format!("Tool '{}' failed: {}", lookup_name, e); | ||
| if error_msg.contains("Extension not installed:") |
There was a problem hiding this comment.
High Severity — Privilege escalation via error message string matching
Retry-with-ExplicitActivate is gated on error_msg.contains("Extension not installed:"). This is fragile (breaks if message text changes) and exploitable (adversarial error output containing this string triggers auto-installation).
Suggested fix: Introduce a typed error variant ToolError::ExtensionNotInstalled { name } and match on that instead of string content.
| /// Load persisted install metadata for a skill directory, if present. | ||
| pub fn read_install_metadata(path: &Path) -> Option<InstalledSkillMetadata> { | ||
| let meta_path = path.join(INSTALL_METADATA_FILE); | ||
| let bytes = std::fs::read(&meta_path).ok()?; |
There was a problem hiding this comment.
High Severity — Blocking I/O in async context
read_install_metadata uses synchronous std::fs::read but is called from async code paths (web handlers, skill migration). Blocks the tokio runtime thread, causing latency spikes or deadlocks under load.
Suggested fix: Make async with tokio::fs::read, or use tokio::task::spawn_blocking at call sites.
| return; | ||
| } | ||
| if (_sendCooldown) return; | ||
| if (pendingAttachmentReads.length > 0) { |
There was a problem hiding this comment.
Medium Severity — Race condition in async sendMessage
Cooldown check (if (_sendCooldown) return) happens at line 1162 before await Promise.all(pendingAttachmentReads) at line 1164. However, two rapid Enter presses can both pass cooldown before the first send completes, because _sendCooldown is only set further down. The await creates a yield point that widens the race window.
Suggested fix: Set cooldown flag or disable send button before the await, not after.
| continue; | ||
| } | ||
|
|
||
| attachment.local_path = Some(relative_path.clone()); |
There was a problem hiding this comment.
Medium Severity — Attachment data not freed after disk write
After tokio::fs::write at line 218, attachment.data (up to 5MB each) is not cleared. Binary content stays in memory through entire request including LLM calls.
Suggested fix: Add attachment.data.clear() after successful write (or after setting local_path).
|
|
||
| Some( | ||
| match ext_mgr | ||
| match match ext_mgr |
There was a problem hiding this comment.
Medium Severity — Nested match-match with implicit auto-install
match match ext_mgr.ensure_extension_ready(...) nests two matches. The outer Err(NotInstalled) arm retries with ExplicitActivate (auto-install). This implicit privilege escalation is nearly invisible in the nested syntax.
Suggested fix: Extract into a helper method (like the new prepare_tool_for_execution) to make the retry-with-escalation logic auditable and consistent.
| } | ||
| }; | ||
|
|
||
| if payload.skill_md.len() as u64 > ironclaw_skills::MAX_PROMPT_FILE_SIZE { |
There was a problem hiding this comment.
Medium Severity — Size check bypassed for GitHub skill payloads
MAX_PROMPT_FILE_SIZE is only checked in the generic path here. GitHub blob and repo paths (github_blob_raw_url returns at line ~1413, fetch_github_repo_payload returns at line ~1400) both return early without checking.
A malicious repo could host an oversized SKILL.md that bypasses this validation.
Suggested fix: Apply MAX_PROMPT_FILE_SIZE validation in all return paths, or move the check into a shared post-processing step before returning from fetch_skill_payload.
| project_id: ironclaw_engine::ProjectId, | ||
| attachments: &mut [crate::channels::IncomingAttachment], | ||
| ) -> Vec<AttachmentIndexNote> { | ||
| let cwd = match std::env::current_dir() { |
There was a problem hiding this comment.
Medium Severity — Production code relies on process-global current_dir
persist_project_attachments uses std::env::current_dir(). The test at line ~4277 uses set_current_dir (process-global mutation). Other concurrent tests could be affected despite CWD_TEST_LOCK.
Suggested fix: Pass project root as an explicit parameter instead of relying on current_dir().
Summary
.ironclaw/attachments/...storage and verify attachment handling across gateway, Telegram, and WhatsApp paths/<skill>routing behaves correctlystagingby fixing warning noise, flaky reconnect/history assumptions, writable-thread handling, approval-flow isolation, Telegram stale-update handling, and initial auth-page bootstrap racesTest plan
cargo clippy --all --benches --tests --examples --no-default-features --features libsql -- -D warningscargo fmt --all -- --checktests/e2e/.venv/bin/pytest tests/e2e/scenarios/ -q -s --maxfail=1 -W error::DeprecationWarning -W error::pytest.PytestUnraisableExceptionWarning