Skip to content

Commit 8831890

Browse files
authored
feat: resize and compress images before base64 encoding (#210)
* feat: resize and compress images before base64 encoding Follow OpenClaw's approach to prevent large image payloads from exceeding JSON-RPC transport limits (Internal Error -32603). Changes: - Add image crate dependency (jpeg, png, gif, webp) - Resize images so longest side <= 1200px (Lanczos3) - Re-encode as JPEG at quality 75 (~200-400KB after base64) - GIFs pass through unchanged to preserve animation - Fallback to original bytes if resize fails Fixes #209 * test: add unit tests for image resize and compression Tests cover: - Large image resized to max 1200px - Small image keeps original dimensions - Landscape/portrait aspect ratio preserved - Compressed output smaller than original - GIF passes through unchanged - Invalid data returns error * fix: preserve aspect ratio on resize + add fallback size check Address review feedback from @the3mi: - 🔴 Fix resize() to calculate proportional dimensions instead of forcing 1200x1200 (was distorting images) - 🟡 Add 1MB size check on fallback path when resize fails - Fix portrait/landscape test assertions to match correct aspect ratios * fix: restore post-download size check + use structured logging Address minor review feedback: - Restore defense-in-depth bytes.len() check after download - Use tracing structured fields (url = %url, error = %e) for consistency with codebase style --------- Co-authored-by: chaodu-agent <chaodu-agent@users.noreply.github.com>
1 parent 516bafa commit 8831890

2 files changed

Lines changed: 166 additions & 38 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ anyhow = "1"
1717
rand = "0.8"
1818
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
1919
base64 = "0.22"
20+
image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] }

src/discord.rs

Lines changed: 165 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use crate::format;
55
use crate::reactions::StatusReactionController;
66
use base64::engine::general_purpose::STANDARD as BASE64;
77
use base64::Engine;
8+
use image::ImageReader;
9+
use std::io::Cursor;
810
use std::sync::LazyLock;
911
use serenity::async_trait;
1012
use serenity::model::channel::{Message, ReactionType};
@@ -233,14 +235,20 @@ impl EventHandler for Handler {
233235
}
234236
}
235237

236-
/// Download a Discord image attachment and encode it as an ACP image content block.
237-
///
238-
/// Discord attachment URLs are temporary and expire, so we must download
239-
/// and encode the image data immediately. The ACP ImageContent schema
240-
/// requires `{ data: base64_string, mimeType: "image/..." }`.
238+
/// Maximum dimension (width or height) for resized images.
239+
/// Matches OpenClaw's DEFAULT_IMAGE_MAX_DIMENSION_PX.
240+
const IMAGE_MAX_DIMENSION_PX: u32 = 1200;
241+
242+
/// JPEG quality for compressed output (OpenClaw uses progressive 85→35;
243+
/// we start at 75 which is a good balance of quality vs size).
244+
const IMAGE_JPEG_QUALITY: u8 = 75;
245+
246+
/// Download a Discord image attachment, resize/compress it, then base64-encode
247+
/// as an ACP image content block.
241248
///
242-
/// Security: rejects non-image attachments (by content-type or extension)
243-
/// and files larger than 10MB to prevent OOM/abuse.
249+
/// Large images are resized so the longest side is at most 1200px and
250+
/// re-encoded as JPEG at quality 75. This keeps the base64 payload well
251+
/// under typical JSON-RPC transport limits (~200-400KB after encoding).
244252
async fn download_and_encode_image(attachment: &serenity::model::channel::Attachment) -> Option<ContentBlock> {
245253
const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB
246254

@@ -267,69 +275,104 @@ async fn download_and_encode_image(attachment: &serenity::model::channel::Attach
267275
})
268276
});
269277

270-
// Validate that it's actually an image
271278
let Some(mime) = media_type else {
272-
debug!(filename = %attachment.filename, "skipping non-image attachment (no matching content-type or extension)");
279+
debug!(filename = %attachment.filename, "skipping non-image attachment");
273280
return None;
274281
};
275-
// Strip MIME type parameters (e.g. "image/jpeg; charset=utf-8" → "image/jpeg")
276-
// Downstream LLM APIs (Claude, OpenAI, Gemini) reject MIME types with parameters
277282
let mime = mime.split(';').next().unwrap_or(mime).trim();
278283
if !mime.starts_with("image/") {
279284
debug!(filename = %attachment.filename, mime = %mime, "skipping non-image attachment");
280285
return None;
281286
}
282287

283-
// Size check before downloading
284288
if u64::from(attachment.size) > MAX_SIZE {
285-
error!(
286-
filename = %attachment.filename,
287-
size = attachment.size,
288-
max = MAX_SIZE,
289-
"image attachment exceeds 10MB limit"
290-
);
289+
error!(filename = %attachment.filename, size = attachment.size, "image exceeds 10MB limit");
291290
return None;
292291
}
293292

294-
// Download using the static reusable client
295293
let response = match HTTP_CLIENT.get(url).send().await {
296294
Ok(resp) => resp,
297-
Err(e) => {
298-
error!("failed to download image {}: {}", url, e);
299-
return None;
300-
}
295+
Err(e) => { error!(url = %url, error = %e, "download failed"); return None; }
301296
};
302-
303297
if !response.status().is_success() {
304-
error!("HTTP error downloading image {}: {}", url, response.status());
298+
error!(url = %url, status = %response.status(), "HTTP error downloading image");
305299
return None;
306300
}
307-
308301
let bytes = match response.bytes().await {
309302
Ok(b) => b,
310-
Err(e) => {
311-
error!("failed to read image bytes from {}: {}", url, e);
312-
return None;
313-
}
303+
Err(e) => { error!(url = %url, error = %e, "read failed"); return None; }
314304
};
315305

316-
// Final size check after download (defense in depth)
306+
// Defense-in-depth: verify actual download size
317307
if bytes.len() as u64 > MAX_SIZE {
318-
error!(
319-
filename = %attachment.filename,
320-
size = bytes.len(),
321-
"downloaded image exceeds 10MB limit after decode"
322-
);
308+
error!(filename = %attachment.filename, size = bytes.len(), "downloaded image exceeds limit");
323309
return None;
324310
}
325311

326-
let encoded = BASE64.encode(bytes.as_ref());
312+
// Resize and compress
313+
let (output_bytes, output_mime) = match resize_and_compress(&bytes) {
314+
Ok(result) => result,
315+
Err(e) => {
316+
// Fallback: use original bytes but reject if too large for transport
317+
if bytes.len() > 1024 * 1024 {
318+
error!(filename = %attachment.filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping");
319+
return None;
320+
}
321+
debug!(filename = %attachment.filename, error = %e, "resize failed, using original");
322+
(bytes.to_vec(), mime.to_string())
323+
}
324+
};
325+
326+
debug!(
327+
filename = %attachment.filename,
328+
original_size = bytes.len(),
329+
compressed_size = output_bytes.len(),
330+
"image processed"
331+
);
332+
333+
let encoded = BASE64.encode(&output_bytes);
327334
Some(ContentBlock::Image {
328-
media_type: mime.to_string(),
335+
media_type: output_mime,
329336
data: encoded,
330337
})
331338
}
332339

340+
/// Resize image so longest side ≤ IMAGE_MAX_DIMENSION_PX, then encode as JPEG.
341+
/// Returns (compressed_bytes, mime_type). GIFs are passed through unchanged
342+
/// to preserve animation.
343+
fn resize_and_compress(raw: &[u8]) -> Result<(Vec<u8>, String), image::ImageError> {
344+
let reader = ImageReader::new(Cursor::new(raw))
345+
.with_guessed_format()?;
346+
347+
let format = reader.format();
348+
349+
// Pass through GIFs unchanged to preserve animation
350+
if format == Some(image::ImageFormat::Gif) {
351+
return Ok((raw.to_vec(), "image/gif".to_string()));
352+
}
353+
354+
let img = reader.decode()?;
355+
let (w, h) = (img.width(), img.height());
356+
357+
// Resize preserving aspect ratio: scale so longest side = 1200px
358+
let img = if w > IMAGE_MAX_DIMENSION_PX || h > IMAGE_MAX_DIMENSION_PX {
359+
let max_side = std::cmp::max(w, h);
360+
let ratio = f64::from(IMAGE_MAX_DIMENSION_PX) / f64::from(max_side);
361+
let new_w = (f64::from(w) * ratio) as u32;
362+
let new_h = (f64::from(h) * ratio) as u32;
363+
img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3)
364+
} else {
365+
img
366+
};
367+
368+
// Encode as JPEG
369+
let mut buf = Cursor::new(Vec::new());
370+
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, IMAGE_JPEG_QUALITY);
371+
img.write_with_encoder(encoder)?;
372+
373+
Ok((buf.into_inner(), "image/jpeg".to_string()))
374+
}
375+
333376
async fn edit(ctx: &Context, ch: ChannelId, msg_id: MessageId, content: &str) -> serenity::Result<Message> {
334377
ch.edit_message(&ctx.http, msg_id, serenity::builder::EditMessage::new().content(content)).await
335378
}
@@ -542,3 +585,87 @@ async fn get_or_create_thread(ctx: &Context, msg: &Message, prompt: &str) -> any
542585
Ok(thread.id.get())
543586
}
544587

588+
589+
#[cfg(test)]
590+
mod tests {
591+
use super::*;
592+
593+
fn make_png(width: u32, height: u32) -> Vec<u8> {
594+
let img = image::RgbImage::new(width, height);
595+
let mut buf = Cursor::new(Vec::new());
596+
img.write_to(&mut buf, image::ImageFormat::Png).unwrap();
597+
buf.into_inner()
598+
}
599+
600+
#[test]
601+
fn large_image_resized_to_max_dimension() {
602+
let png = make_png(3000, 2000);
603+
let (compressed, mime) = resize_and_compress(&png).unwrap();
604+
605+
assert_eq!(mime, "image/jpeg");
606+
let result = image::load_from_memory(&compressed).unwrap();
607+
assert!(result.width() <= IMAGE_MAX_DIMENSION_PX);
608+
assert!(result.height() <= IMAGE_MAX_DIMENSION_PX);
609+
}
610+
611+
#[test]
612+
fn small_image_keeps_original_dimensions() {
613+
let png = make_png(800, 600);
614+
let (compressed, mime) = resize_and_compress(&png).unwrap();
615+
616+
assert_eq!(mime, "image/jpeg");
617+
let result = image::load_from_memory(&compressed).unwrap();
618+
assert_eq!(result.width(), 800);
619+
assert_eq!(result.height(), 600);
620+
}
621+
622+
#[test]
623+
fn landscape_image_respects_aspect_ratio() {
624+
let png = make_png(4000, 2000);
625+
let (compressed, _) = resize_and_compress(&png).unwrap();
626+
627+
let result = image::load_from_memory(&compressed).unwrap();
628+
assert_eq!(result.width(), 1200);
629+
assert_eq!(result.height(), 600);
630+
}
631+
632+
#[test]
633+
fn portrait_image_respects_aspect_ratio() {
634+
let png = make_png(2000, 4000);
635+
let (compressed, _) = resize_and_compress(&png).unwrap();
636+
637+
let result = image::load_from_memory(&compressed).unwrap();
638+
assert_eq!(result.width(), 600);
639+
assert_eq!(result.height(), 1200);
640+
}
641+
642+
#[test]
643+
fn compressed_output_is_smaller_than_original() {
644+
let png = make_png(3000, 2000);
645+
let (compressed, _) = resize_and_compress(&png).unwrap();
646+
647+
assert!(compressed.len() < png.len(), "compressed {} should be < original {}", compressed.len(), png.len());
648+
}
649+
650+
#[test]
651+
fn gif_passes_through_unchanged() {
652+
// Minimal valid GIF89a (1x1 pixel)
653+
let gif: Vec<u8> = vec![
654+
0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a
655+
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, // logical screen descriptor
656+
0x2C, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, // image descriptor
657+
0x02, 0x02, 0x44, 0x01, 0x00, // image data
658+
0x3B, // trailer
659+
];
660+
let (output, mime) = resize_and_compress(&gif).unwrap();
661+
662+
assert_eq!(mime, "image/gif");
663+
assert_eq!(output, gif);
664+
}
665+
666+
#[test]
667+
fn invalid_data_returns_error() {
668+
let garbage = vec![0x00, 0x01, 0x02, 0x03];
669+
assert!(resize_and_compress(&garbage).is_err());
670+
}
671+
}

0 commit comments

Comments
 (0)