Skip to content

Commit c79f80a

Browse files
committed
forgot about some silly foibles of bluesky image links
1 parent 53b80b2 commit c79f80a

1 file changed

Lines changed: 110 additions & 6 deletions

File tree

crates/pattern_core/src/data_source/bluesky.rs

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,10 @@ impl BlueskyPost {
626626
pub fn collect_image_urls(&self) -> Vec<String> {
627627
match &self.embed {
628628
Some(EmbedInfo::Images { urls, .. }) => urls.clone(),
629+
Some(EmbedInfo::External {
630+
thumb: Some(thumb_url),
631+
..
632+
}) => vec![thumb_url.clone()],
629633
Some(EmbedInfo::QuoteWithMedia { media, .. }) => {
630634
if let EmbedInfo::Images { urls, .. } = media.as_ref() {
631635
urls.clone()
@@ -687,6 +691,20 @@ impl BlueskyPost {
687691
uri: external.external.uri.clone(),
688692
title: external.external.title.clone(),
689693
description: external.external.description.clone(),
694+
thumb: external.external.thumb.as_ref().and_then(|thumb| {
695+
if let Ok(thumb_json) = serde_json::to_string(thumb) {
696+
if let Some(cid) = extract_cid_from_blob(&thumb_json) {
697+
Some(format!(
698+
"https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg",
699+
did, cid
700+
))
701+
} else {
702+
None
703+
}
704+
} else {
705+
None
706+
}
707+
}),
690708
})
691709
}
692710
Union::Refs(RecordEmbedRefs::AppBskyEmbedImagesMain(images)) => {
@@ -746,6 +764,7 @@ impl BlueskyPost {
746764
uri: external.external.uri.clone(),
747765
title: external.external.title.clone(),
748766
description: external.external.description.clone(),
767+
thumb: None, // Not available from record
749768
})
750769
}
751770
_ => return None,
@@ -862,7 +881,7 @@ impl BlueskyPost {
862881

863882
// Convert PostView embed to enriched EmbedInfo (has more data than record embeds)
864883
if let Some(embed) = &post_view.embed {
865-
if let Some(enriched_embed) = Self::convert_postview_embed(embed) {
884+
if let Some(enriched_embed) = Self::convert_postview_embed(embed, &post.did) {
866885
post.embed = Some(enriched_embed);
867886
post.hydration.embed_enriched = true;
868887
}
@@ -871,7 +890,7 @@ impl BlueskyPost {
871890
Some(post)
872891
}
873892
/// Convert PostView embed to EmbedInfo without network calls
874-
fn convert_postview_embed(embed: &Union<PostViewEmbedRefs>) -> Option<EmbedInfo> {
893+
fn convert_postview_embed(embed: &Union<PostViewEmbedRefs>, did: &str) -> Option<EmbedInfo> {
875894
match embed {
876895
Union::Refs(PostViewEmbedRefs::AppBskyEmbedImagesView(images_view)) => {
877896
// PostView has image thumbnails but not full URLs
@@ -885,7 +904,26 @@ impl BlueskyPost {
885904
urls: images_view
886905
.images
887906
.iter()
888-
.map(|img| img.thumb.clone()) // Use thumb URLs for now
907+
.map(|img| {
908+
// Try fullsize first, fallback to thumb with appropriate CDN format
909+
if img.fullsize.is_empty() {
910+
// Fallback to thumbnail - extract CID from blob JSON and use thumbnail CDN format
911+
if img.thumb.starts_with("http") {
912+
img.thumb.clone()
913+
} else if let Some(cid) = extract_cid_from_blob(&img.thumb) {
914+
if cid.starts_with("http") {
915+
cid
916+
} else {
917+
format!("https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg", did, cid)
918+
}
919+
} else {
920+
// Final fallback: treat as plain CID
921+
format!("https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg", did, img.thumb)
922+
}
923+
} else {
924+
convert_blob_to_url(&img.fullsize, did)
925+
}
926+
})
889927
.collect(),
890928
})
891929
}
@@ -894,6 +932,11 @@ impl BlueskyPost {
894932
uri: external_view.external.uri.clone(),
895933
title: external_view.external.title.clone(),
896934
description: external_view.external.description.clone(),
935+
thumb: external_view
936+
.external
937+
.thumb
938+
.as_ref()
939+
.map(|thumb| convert_blob_to_url(thumb, did)),
897940
})
898941
}
899942
Union::Refs(PostViewEmbedRefs::AppBskyEmbedRecordView(record_view)) => {
@@ -968,14 +1011,29 @@ impl BlueskyPost {
9681011
Box::new(EmbedInfo::Images {
9691012
count: images.images.len(),
9701013
alt_texts: images.images.iter().map(|img| img.alt.clone()).collect(),
971-
urls: images.images.iter().map(|img| img.thumb.clone()).collect(),
1014+
urls: images.images.iter().map(|img| {
1015+
if img.fullsize.starts_with("http") {
1016+
img.fullsize.clone()
1017+
} else if let Some(cid) = extract_cid_from_blob(&img.fullsize) {
1018+
if cid.starts_with("http") {
1019+
cid
1020+
} else {
1021+
// Use the parent post's DID (not the quoted post's DID)
1022+
format!("https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg", did, cid)
1023+
}
1024+
} else {
1025+
// Use the parent post's DID for the fallback case too
1026+
format!("https://cdn.bsky.app/img/feed_thumbnail/plain/{}/{}@jpeg", did, img.thumb)
1027+
}
1028+
}).collect(),
9721029
})
9731030
}
9741031
Union::Refs(atrium_api::app::bsky::embed::record_with_media::ViewMediaRefs::AppBskyEmbedExternalView(external)) => {
9751032
Box::new(EmbedInfo::External {
9761033
uri: external.external.uri.clone(),
9771034
title: external.external.title.clone(),
9781035
description: external.external.description.clone(),
1036+
thumb: external.external.thumb.as_ref().map(|thumb| convert_blob_to_url(thumb, did)),
9791037
})
9801038
}
9811039
_ => return None,
@@ -3832,6 +3890,7 @@ pub enum EmbedInfo {
38323890
uri: String,
38333891
title: String,
38343892
description: String,
3893+
thumb: Option<String>,
38353894
},
38363895
Quote {
38373896
uri: String,
@@ -3859,9 +3918,9 @@ impl EmbedInfo {
38593918
..
38603919
} => {
38613920
buf.push_str(&format!("{} [📸 {} image(s)]\n", indent, count));
3862-
// Show URLs for each image
3921+
// Show URLs for each image (using format that won't trigger multimodal parsing)
38633922
for (i, url) in urls.iter().enumerate() {
3864-
buf.push_str(&format!("{} [IMAGE: {}]\n", indent, url));
3923+
buf.push_str(&format!("{} (img: {})\n", indent, url));
38653924
// Show alt text if available
38663925
if let Some(alt) = alt_texts.get(i) {
38673926
if !alt.is_empty() {
@@ -3874,8 +3933,12 @@ impl EmbedInfo {
38743933
uri,
38753934
title,
38763935
description,
3936+
thumb,
38773937
} => {
38783938
buf.push_str(&format!("{} [🔗 Link Card]\n", indent));
3939+
if let Some(thumb_url) = thumb {
3940+
buf.push_str(&format!("{} (thumb: {})\n", indent, thumb_url));
3941+
}
38793942
if !title.is_empty() {
38803943
buf.push_str(&format!("{} {}\n", indent, title));
38813944
}
@@ -3926,6 +3989,47 @@ fn create_basic_memory_content(handle: &str, did: &str) -> String {
39263989
)
39273990
}
39283991

3992+
/// Extract CID from a blob JSON string like {"$type":"blob","ref":{"$link":"bafkreihirce..."},"mimeType":"image/jpeg","size":125235}
3993+
fn extract_cid_from_blob(blob_json: &str) -> Option<String> {
3994+
use serde_json::Value;
3995+
3996+
if let Ok(blob) = serde_json::from_str::<Value>(blob_json) {
3997+
blob.get("ref")
3998+
.and_then(|r| r.get("$link"))
3999+
.and_then(|link| link.as_str())
4000+
.map(|s| s.to_string())
4001+
} else {
4002+
// If it's not JSON, assume it's already a CID or URL
4003+
Some(blob_json.to_string())
4004+
}
4005+
}
4006+
4007+
/// Convert a Bluesky blob CID to a CDN image URL
4008+
/// If the image is already an HTTP URL, returns it as-is
4009+
/// If it's a blob reference JSON, extracts the CID and converts to cdn.bsky.app URL using fullsize format
4010+
fn convert_blob_to_url(blob_ref: &str, did: &str) -> String {
4011+
if blob_ref.starts_with("http") {
4012+
// Already an HTTP URL
4013+
blob_ref.to_string()
4014+
} else if let Some(cid) = extract_cid_from_blob(blob_ref) {
4015+
if cid.starts_with("http") {
4016+
cid
4017+
} else {
4018+
// Convert CID to CDN URL (full size)
4019+
format!(
4020+
"https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg",
4021+
did, cid
4022+
)
4023+
}
4024+
} else {
4025+
// Fallback: treat as plain CID
4026+
format!(
4027+
"https://cdn.bsky.app/img/feed_fullsize/plain/{}/{}@jpeg",
4028+
did, blob_ref
4029+
)
4030+
}
4031+
}
4032+
39294033
fn label_convert(l: &Union<RecordLabelsRefs>) -> Vec<String> {
39304034
match l {
39314035
atrium_api::types::Union::Refs(

0 commit comments

Comments
 (0)