Skip to content

Commit 16b064d

Browse files
committed
deduplication and more accurate token estimation for images
1 parent b3b4731 commit 16b064d

5 files changed

Lines changed: 37 additions & 25 deletions

File tree

bsky_agent/agents/pattern_config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ chunk_size = 30
115115
summarization_model = "claude-3-5-haiku-20241022"
116116

117117
[context]
118-
max_messages = 250
118+
max_messages = 200
119119

120120

121121
# Bluesky configuration for the constellation

crates/pattern_core/src/context/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ impl Default for ContextConfig {
156156
base_instructions: DEFAULT_BASE_INSTRUCTIONS.to_string(),
157157
memory_char_limit: DEFAULT_CORE_MEMORY_CHAR_LIMIT,
158158
max_context_messages: DEFAULT_MAX_CONTEXT_MESSAGES,
159-
max_context_tokens: Some(128000),
159+
max_context_tokens: Some(200_000),
160160
enable_thinking: true,
161161
tool_usage_rules: Vec::new(),
162162
tool_workflow_rules: Vec::new(),
@@ -187,7 +187,7 @@ impl Default for ModelAdjustments {
187187
Self {
188188
native_thinking: false,
189189
use_xml_tags: true,
190-
max_context_tokens: Some(128_000),
190+
max_context_tokens: Some(200_000),
191191
token_multiplier: 1.0, // Rough estimate: 1 token ≈ 0.75 words
192192
}
193193
}

crates/pattern_core/src/data_source/bluesky.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,32 +78,32 @@ pub struct ThreadContext {
7878
impl ThreadContext {
7979
/// Collect all image URLs from the thread context
8080
pub fn collect_all_image_urls(&self, main_post: &BlueskyPost) -> Vec<String> {
81-
let mut urls = Vec::new();
81+
let mut unique_urls = std::collections::HashSet::new();
8282

8383
// Collect from parent chain
8484
for (parent, siblings) in &self.parent_chain {
85-
urls.extend(parent.collect_image_urls());
85+
unique_urls.extend(parent.collect_image_urls());
8686
for sibling in siblings {
87-
urls.extend(sibling.collect_image_urls());
87+
unique_urls.extend(sibling.collect_image_urls());
8888
}
8989
}
9090

9191
// Collect from root if different
9292
if let Some(root) = &self.root {
93-
urls.extend(root.collect_image_urls());
93+
unique_urls.extend(root.collect_image_urls());
9494
}
9595

9696
// Collect from main post
97-
urls.extend(main_post.collect_image_urls());
97+
unique_urls.extend(main_post.collect_image_urls());
9898

9999
// Collect from replies
100100
for replies in self.replies_map.values() {
101101
for reply in replies {
102-
urls.extend(reply.collect_image_urls());
102+
unique_urls.extend(reply.collect_image_urls());
103103
}
104104
}
105105

106-
urls
106+
unique_urls.into_iter().collect()
107107
}
108108

109109
/// Append full thread tree to buffer
@@ -1018,8 +1018,8 @@ impl BlueskyPost {
10181018
if cid.starts_with("http") {
10191019
cid
10201020
} else {
1021-
// Use the parent post's DID (not the quoted post's DID)
1022-
format!("https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg", did, cid)
1021+
// Use the parent post's DID (not the quoted post's DID) - thumbnail for LLM
1022+
format!("https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg", did, cid)
10231023
}
10241024
} else {
10251025
// Use the parent post's DID for the fallback case too
@@ -4015,16 +4015,16 @@ fn convert_blob_to_url(blob_ref: &str, did: &str) -> String {
40154015
if cid.starts_with("http") {
40164016
cid
40174017
} else {
4018-
// Convert CID to CDN URL (full size)
4018+
// Convert CID to CDN URL (thumbnail - better for LLM processing)
40194019
format!(
4020-
"https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg",
4020+
"https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg",
40214021
did, cid
40224022
)
40234023
}
40244024
} else {
4025-
// Fallback: treat as plain CID
4025+
// Fallback: treat as plain CID (thumbnail)
40264026
format!(
4027-
"https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg",
4027+
"https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg",
40284028
did, blob_ref
40294029
)
40304030
}

crates/pattern_core/src/message.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2223,8 +2223,20 @@ impl Message {
22232223
/// Rough estimation of token count for this message
22242224
///
22252225
/// Uses the approximation of ~4 characters per token
2226+
/// Images are estimated at 1600 tokens each
22262227
pub fn estimate_tokens(&self) -> usize {
2227-
self.display_content().len() / 4
2228+
let text_tokens = self.display_content().len() / 4;
2229+
2230+
// Count images in the message
2231+
let image_count = match &self.content {
2232+
MessageContent::Parts(parts) => parts
2233+
.iter()
2234+
.filter(|part| matches!(part, ContentPart::Image { .. }))
2235+
.count(),
2236+
_ => 0,
2237+
};
2238+
2239+
text_tokens + (image_count * 1600)
22282240
}
22292241
}
22302242

crates/pattern_discord/src/bot.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,7 @@ impl DiscordBot {
11351135

11361136
// Process attachments if any
11371137
let mut attachment_content = String::new();
1138-
let mut image_markers = Vec::new();
1138+
let mut unique_image_urls = std::collections::HashSet::new();
11391139
if !msg.attachments.is_empty() {
11401140
for attachment in &msg.attachments {
11411141
// Check if it's an image file
@@ -1150,8 +1150,8 @@ impl DiscordBot {
11501150
.map_or(false, |ct| ct.starts_with("image/"));
11511151

11521152
if is_image {
1153-
// Add image marker for multimodal processing
1154-
image_markers.push(format!("[IMAGE: {}]", attachment.url));
1153+
// Add unique image URL for multimodal processing
1154+
unique_image_urls.insert(attachment.url.clone());
11551155
attachment_content.push_str(&format!(
11561156
"\n\n[Image attachment: {} ({} bytes)]",
11571157
attachment.filename, attachment.size
@@ -1197,13 +1197,13 @@ impl DiscordBot {
11971197
}
11981198
}
11991199

1200-
// Take only last 4 images to avoid token bloat
1201-
let selected_images: Vec<_> =
1202-
image_markers.iter().rev().take(4).rev().cloned().collect();
1200+
// Convert to vec and take only last 4 images to avoid token bloat
1201+
let all_images: Vec<String> = unique_image_urls.into_iter().collect();
1202+
let selected_images: Vec<_> = all_images.iter().rev().take(4).rev().cloned().collect();
12031203

12041204
// Append image markers to attachment content
1205-
for image_marker in &selected_images {
1206-
attachment_content.push_str(&format!("\n{}", image_marker));
1205+
for image_url in &selected_images {
1206+
attachment_content.push_str(&format!("\n[IMAGE: {}]", image_url));
12071207
}
12081208

12091209
// Create framing prompt that makes responding optional

0 commit comments

Comments
 (0)