@@ -78,32 +78,32 @@ pub struct ThreadContext {
7878impl ThreadContext {
7979 /// Collect all image URLs from the thread context
8080 pub fn collect_all_image_urls ( & self , main_post : & BlueskyPost ) -> Vec < String > {
81- let mut urls = Vec :: new ( ) ;
81+ let mut unique_urls = std :: collections :: HashSet :: new ( ) ;
8282
8383 // Collect from parent chain
8484 for ( parent, siblings) in & self . parent_chain {
85- urls . extend ( parent. collect_image_urls ( ) ) ;
85+ unique_urls . extend ( parent. collect_image_urls ( ) ) ;
8686 for sibling in siblings {
87- urls . extend ( sibling. collect_image_urls ( ) ) ;
87+ unique_urls . extend ( sibling. collect_image_urls ( ) ) ;
8888 }
8989 }
9090
9191 // Collect from root if different
9292 if let Some ( root) = & self . root {
93- urls . extend ( root. collect_image_urls ( ) ) ;
93+ unique_urls . extend ( root. collect_image_urls ( ) ) ;
9494 }
9595
9696 // Collect from main post
97- urls . extend ( main_post. collect_image_urls ( ) ) ;
97+ unique_urls . extend ( main_post. collect_image_urls ( ) ) ;
9898
9999 // Collect from replies
100100 for replies in self . replies_map . values ( ) {
101101 for reply in replies {
102- urls . extend ( reply. collect_image_urls ( ) ) ;
102+ unique_urls . extend ( reply. collect_image_urls ( ) ) ;
103103 }
104104 }
105105
106- urls
106+ unique_urls . into_iter ( ) . collect ( )
107107 }
108108
109109 /// Append full thread tree to buffer
@@ -1018,8 +1018,8 @@ impl BlueskyPost {
10181018 if cid. starts_with ( "http" ) {
10191019 cid
10201020 } else {
1021- // Use the parent post's DID (not the quoted post's DID)
1022- format ! ( "https://cdn.bsky.app/img/feed_fullsize /plain/{}/{}@jpeg" , did, cid)
1021+ // Use the parent post's DID (not the quoted post's DID) - thumbnail for LLM
1022+ format ! ( "https://cdn.bsky.app/img/feed_thumbnail /plain/{}/{}@jpeg" , did, cid)
10231023 }
10241024 } else {
10251025 // Use the parent post's DID for the fallback case too
@@ -4015,16 +4015,16 @@ fn convert_blob_to_url(blob_ref: &str, did: &str) -> String {
40154015 if cid. starts_with ( "http" ) {
40164016 cid
40174017 } else {
4018- // Convert CID to CDN URL (full size )
4018+ // Convert CID to CDN URL (thumbnail - better for LLM processing )
40194019 format ! (
4020- "https://cdn.bsky.app/img/feed_fullsize /plain/{}/{}@jpeg" ,
4020+ "https://cdn.bsky.app/img/feed_thumbnail /plain/{}/{}@jpeg" ,
40214021 did, cid
40224022 )
40234023 }
40244024 } else {
4025- // Fallback: treat as plain CID
4025+ // Fallback: treat as plain CID (thumbnail)
40264026 format ! (
4027- "https://cdn.bsky.app/img/feed_fullsize /plain/{}/{}@jpeg" ,
4027+ "https://cdn.bsky.app/img/feed_thumbnail /plain/{}/{}@jpeg" ,
40284028 did, blob_ref
40294029 )
40304030 }
0 commit comments