From 0c46ac9172f7e3fbea4d1fd1886b108a8980eae2 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 15 Jan 2026 18:03:59 -0500 Subject: [PATCH 01/42] Add immediate priority for queue_backfill dids command --- rsky-wintermute/src/bin/queue_backfill.rs | 22 +++++++++++++++++----- rsky-wintermute/src/storage.rs | 14 ++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/rsky-wintermute/src/bin/queue_backfill.rs b/rsky-wintermute/src/bin/queue_backfill.rs index 2757993a..e87a050b 100644 --- a/rsky-wintermute/src/bin/queue_backfill.rs +++ b/rsky-wintermute/src/bin/queue_backfill.rs @@ -51,6 +51,9 @@ enum Command { /// Queue with normal priority instead of high priority #[arg(long, default_value = "false")] normal_priority: bool, + /// Queue with immediate priority (processed first, before all other items) + #[arg(long, default_value = "false")] + immediate: bool, }, /// Show current queue status Status, @@ -120,7 +123,8 @@ fn main() -> Result<()> { Command::Dids { dids, normal_priority, - } => queue_dids(&storage, &dids, !normal_priority), // DIDs use priority by default + immediate, + } => queue_dids(&storage, &dids, !normal_priority, immediate), Command::Status => show_status(&storage), Command::Peek { count } => peek_queue(&storage, count), Command::Search { did, limit } => search_queue(&storage, &did, limit), @@ -276,11 +280,17 @@ async fn queue_from_pds(storage: &Storage, host: &str, priority: bool) -> Result Ok(()) } -fn queue_dids(storage: &Storage, dids: &[String], priority: bool) -> Result<()> { +fn queue_dids(storage: &Storage, dids: &[String], priority: bool, immediate: bool) -> Result<()> { let mut queued = 0; let mut skipped = 0; - let priority_str = if priority { "HIGH PRIORITY" } else { "normal" }; + let priority_str = if immediate { + "IMMEDIATE" + } else if priority { + "HIGH PRIORITY" + } else { + "normal" + }; println!("Queuing DIDs with {priority_str} priority"); for did_arg in dids { @@ -301,10 +311,12 @@ fn queue_dids(storage: &Storage, dids: &[String], priority: bool) -> Result<()> let job = BackfillJob { did: did.to_string(), retry_count: 0, - priority, + priority: priority || immediate, }; - if priority { + if immediate { + storage.enqueue_backfill_immediate(&job)?; + } else if priority { storage.enqueue_backfill_priority(&job)?; } else { storage.enqueue_backfill(&job)?; diff --git a/rsky-wintermute/src/storage.rs b/rsky-wintermute/src/storage.rs index a7555b52..d5b89e04 100644 --- a/rsky-wintermute/src/storage.rs +++ b/rsky-wintermute/src/storage.rs @@ -205,6 +205,20 @@ impl Storage { Ok(()) } + /// Enqueue a backfill job with IMMEDIATE priority (timestamp 0) + /// These items are processed FIRST, before all other priority items + pub fn enqueue_backfill_immediate(&self, job: &BackfillJob) -> Result<(), WintermuteError> { + // Key format: "0:0:{did}" - timestamp 0 ensures it sorts first + let key = format!("0:0:{}", job.did); + let mut value = Vec::new(); + ciborium::into_writer(job, &mut value) + .map_err(|e| WintermuteError::Serialization(format!("failed to serialize job: {e}")))?; + self.repo_backfill + .insert(key.as_bytes(), value.as_slice())?; + crate::metrics::INGESTER_REPO_BACKFILL_LENGTH.inc(); + Ok(()) + } + pub fn dequeue_backfill(&self) -> Result, BackfillJob)>, WintermuteError> { let mut iter = self.repo_backfill.iter(); let Some(entry) = iter.next() else { From 7cf4a02da4dac539d9245f4d7667a20bcd598b1e Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Fri, 16 Jan 2026 00:01:49 -0500 Subject: [PATCH 02/42] Fix profile_agg not updating during bulk inserts The bulk insert paths for posts and follows were not updating profile_agg counters (postsCount, followsCount, followersCount). This caused profiles to show 0 counts despite having posts/follows. Added profile_agg updates after bulk inserts: - copy_insert_posts: Update postsCount for creators - copy_insert_follows: Update followsCount for creators, followersCount for subjects --- rsky-wintermute/src/indexer/bulk.rs | 50 ++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/rsky-wintermute/src/indexer/bulk.rs b/rsky-wintermute/src/indexer/bulk.rs index 193f8044..6c40f67b 100644 --- a/rsky-wintermute/src/indexer/bulk.rs +++ b/rsky-wintermute/src/indexer/bulk.rs @@ -269,15 +269,30 @@ pub async fn copy_insert_posts( .await?; let insert_ms = insert_start.elapsed().as_millis(); + // Phase 4: Update profile_agg postsCount for affected creators + let agg_start = Instant::now(); + client + .execute( + "INSERT INTO profile_agg (did, \"postsCount\") + SELECT creator, COUNT(*) FROM post + WHERE creator IN (SELECT DISTINCT creator FROM _bulk_post) + GROUP BY creator + ON CONFLICT (did) DO UPDATE SET \"postsCount\" = EXCLUDED.\"postsCount\"", + &[], + ) + .await?; + let agg_ms = agg_start.elapsed().as_millis(); + // Log if total > 100ms (worth investigating) - let total_ms = setup_ms + copy_ms + insert_ms; + let total_ms = setup_ms + copy_ms + insert_ms + agg_ms; if total_ms > 100 { tracing::warn!( - "SLOW post bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms) for {} rows", + "SLOW post bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms, agg={}ms) for {} rows", total_ms, setup_ms, copy_ms, insert_ms, + agg_ms, count ); } @@ -519,15 +534,42 @@ pub async fn copy_insert_follows( .await?; let insert_ms = insert_start.elapsed().as_millis(); + // Phase 4: Update profile_agg followsCount and followersCount + let agg_start = Instant::now(); + // Update followsCount for creators (those who are following) + client + .execute( + "INSERT INTO profile_agg (did, \"followsCount\") + SELECT creator, COUNT(*) FROM follow + WHERE creator IN (SELECT DISTINCT creator FROM _bulk_follow) + GROUP BY creator + ON CONFLICT (did) DO UPDATE SET \"followsCount\" = EXCLUDED.\"followsCount\"", + &[], + ) + .await?; + // Update followersCount for subjects (those who are followed) + client + .execute( + "INSERT INTO profile_agg (did, \"followersCount\") + SELECT \"subjectDid\", COUNT(*) FROM follow + WHERE \"subjectDid\" IN (SELECT DISTINCT subject_did FROM _bulk_follow) + GROUP BY \"subjectDid\" + ON CONFLICT (did) DO UPDATE SET \"followersCount\" = EXCLUDED.\"followersCount\"", + &[], + ) + .await?; + let agg_ms = agg_start.elapsed().as_millis(); + // Log if total > 100ms (worth investigating) - let total_ms = setup_ms + copy_ms + insert_ms; + let total_ms = setup_ms + copy_ms + insert_ms + agg_ms; if total_ms > 100 { tracing::warn!( - "SLOW follow bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms) for {} rows", + "SLOW follow bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms, agg={}ms) for {} rows", total_ms, setup_ms, copy_ms, insert_ms, + agg_ms, count ); } From 21baae4710844ccfa9c31a4fdb4f6d55ccb848c1 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Fri, 16 Jan 2026 07:59:42 -0500 Subject: [PATCH 03/42] Add sortAt column to quote inserts The quote table was missing sortAt which is required by the getQuotes dataplane route for pagination. --- rsky-wintermute/src/indexer/mod.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 8c384584..23312f86 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2781,13 +2781,19 @@ impl IndexerManager { if let (Some(embed_uri), Some(embed_cid)) = (embed_uri, embed_cid) { // Only process if it's a post being quoted if embed_uri.contains("/app.bsky.feed.post/") { + // Calculate sortAt (earlier of indexed_at and created_at) + let sort_at_quote = if indexed_at < created_at { + indexed_at + } else { + created_at + }; // Insert into quote table client .execute( - "INSERT INTO quote (uri, cid, creator, subject, \"subjectCid\", \"createdAt\", \"indexedAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7) + "INSERT INTO quote (uri, cid, creator, subject, \"subjectCid\", \"createdAt\", \"indexedAt\", \"sortAt\") + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", - &[&post_uri, &post_cid, &creator, &embed_uri, &embed_cid, &created_at, &indexed_at], + &[&post_uri, &post_cid, &creator, &embed_uri, &embed_cid, &created_at, &indexed_at, &sort_at_quote], ) .await?; From fd4a3ef829dce533f26ffd80a240b3a090f3104d Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Fri, 16 Jan 2026 15:35:50 -0500 Subject: [PATCH 04/42] Add direct_index binary for bypassing queue --- rsky-wintermute/src/bin/direct_index.rs | 268 ++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 rsky-wintermute/src/bin/direct_index.rs diff --git a/rsky-wintermute/src/bin/direct_index.rs b/rsky-wintermute/src/bin/direct_index.rs new file mode 100644 index 00000000..7c883af9 --- /dev/null +++ b/rsky-wintermute/src/bin/direct_index.rs @@ -0,0 +1,268 @@ +use std::io::Cursor; +use std::sync::Arc; + +use clap::Parser; +use color_eyre::Result; +use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod, Runtime}; +use iroh_car::CarReader; +use rsky_identity::types::IdentityResolverOpts; +use rsky_identity::IdResolver; +use rsky_repo::storage::memory_blockstore::MemoryBlockstore; +use rsky_repo::readable_repo::ReadableRepo; +use rsky_syntax::aturi::AtUri; +use tokio_postgres::NoTls; + +use rsky_repo::parse::get_and_parse_record; +use rsky_wintermute::backfiller::convert_record_to_ipld; +use rsky_wintermute::indexer::IndexerManager; +use rsky_wintermute::types::{IndexJob, WriteAction}; + +#[derive(Debug, Parser)] +#[command(name = "direct_index")] +#[command(about = "Directly fetch and index a repo, bypassing queues")] +struct Args { + /// DIDs to index (comma-separated or multiple --did flags) + #[arg(long = "did", num_args = 1..)] + dids: Vec, + + /// PostgreSQL connection URL + #[arg(long, env = "DATABASE_URL")] + database_url: String, +} + +#[tokio::main] +async fn main() -> Result<()> { + color_eyre::install()?; + tracing_subscriber::fmt::init(); + + let args = Args::parse(); + + // Parse all DIDs from args (supporting comma-separated) + let dids: Vec = args + .dids + .iter() + .flat_map(|d| d.split(',').map(|s| s.trim().to_string())) + .filter(|d| !d.is_empty() && d.starts_with("did:")) + .collect(); + + if dids.is_empty() { + eprintln!("No valid DIDs provided"); + return Ok(()); + } + + println!("Will index {} DIDs directly to PostgreSQL", dids.len()); + + // Setup database pool + let mut cfg = Config::new(); + cfg.url = Some(args.database_url.clone()); + cfg.manager = Some(ManagerConfig { + recycling_method: RecyclingMethod::Fast, + }); + + let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?; + let http_client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(120)) + .build()?; + + for did in &dids { + println!("\n=== Processing {} ===", did); + match process_did(&pool, &http_client, did).await { + Ok(count) => println!("Successfully indexed {} records for {}", count, did), + Err(e) => eprintln!("Failed to index {}: {}", did, e), + } + } + + println!("\nDone!"); + Ok(()) +} + +async fn process_did( + pool: &deadpool_postgres::Pool, + http_client: &reqwest::Client, + did: &str, +) -> Result { + // Resolve DID to get PDS endpoint + let resolver_opts = IdentityResolverOpts { + timeout: None, + plc_url: None, + did_cache: None, + backup_nameservers: None, + }; + let mut resolver = IdResolver::new(resolver_opts); + let doc = resolver + .did + .resolve(did.to_string(), None) + .await + .map_err(|e| color_eyre::eyre::eyre!("DID resolution error: {}", e))? + .ok_or_else(|| color_eyre::eyre::eyre!("DID resolution failed"))?; + + let mut pds_endpoint = None; + if let Some(services) = &doc.service { + for service in services { + if service.r#type == "AtprotoPersonalDataServer" || service.id == "#atproto_pds" { + pds_endpoint = Some(service.service_endpoint.clone()); + break; + } + } + } + + let pds_endpoint = + pds_endpoint.ok_or_else(|| color_eyre::eyre::eyre!("No PDS endpoint found"))?; + + println!(" PDS: {}", pds_endpoint); + + // Fetch CAR file + let repo_url = format!("{pds_endpoint}/xrpc/com.atproto.sync.getRepo?did={did}"); + println!(" Fetching CAR..."); + let response = http_client.get(&repo_url).send().await?; + + if !response.status().is_success() { + return Err(color_eyre::eyre::eyre!("HTTP error: {}", response.status())); + } + + let car_bytes = response.bytes().await?; + println!(" CAR size: {} bytes", car_bytes.len()); + + // Parse CAR file + let mut reader = CarReader::new(Cursor::new(car_bytes.to_vec())) + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to parse CAR file: {}", e))?; + let root = *reader + .header() + .roots() + .first() + .ok_or_else(|| color_eyre::eyre::eyre!("No root CID"))?; + + let mut blocks = rsky_repo::block_map::BlockMap::new(); + while let Some((cid, data)) = reader + .next_block() + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to read block: {}", e))? + { + blocks.set(cid, data.clone()); + } + + let blockstore = MemoryBlockstore::new(Some(blocks)) + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to create blockstore: {}", e))?; + let storage_arc = Arc::new(tokio::sync::RwLock::new(blockstore)); + + let mut repo = ReadableRepo::load(storage_arc, root) + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to load repo: {}", e))?; + + if repo.did() != did { + return Err(color_eyre::eyre::eyre!( + "DID mismatch: expected {}, got {}", + did, + repo.did() + )); + } + + // Get all records + let leaves = repo + .data + .list(None, None, None) + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to list records: {}", e))?; + println!(" Found {} records", leaves.len()); + + let blocks_result = { + let storage_guard = repo.storage.read().await; + storage_guard + .get_blocks(leaves.iter().map(|e| e.value).collect()) + .await + .map_err(|e| color_eyre::eyre::eyre!("Failed to get blocks: {}", e))? + }; + + let rev = repo.commit.rev.clone(); + let now = chrono::Utc::now() + .format("%Y-%m-%dT%H:%M:%S%.3fZ") + .to_string(); + + let mut indexed_count = 0; + let mut skipped_count = 0; + + for entry in &leaves { + let uri_string = format!("at://{did}/{}", entry.key); + let Ok(uri) = AtUri::new(uri_string, None) else { + skipped_count += 1; + continue; + }; + + let collection = uri.get_collection(); + let rkey = uri.get_rkey(); + + // Filter to bsky/chat records + if !collection.starts_with("app.bsky.") && !collection.starts_with("chat.bsky.") { + skipped_count += 1; + continue; + } + + if let Ok(parsed) = get_and_parse_record(&blocks_result.blocks, entry.value) { + let record_json_raw = serde_json::to_value(&parsed.record)?; + let record_json = convert_record_to_ipld(&record_json_raw); + + let uri_string = format!("at://{did}/{collection}/{rkey}"); + let uri = AtUri::new(uri_string.clone(), None) + .map_err(|e| color_eyre::eyre::eyre!("Invalid URI {}: {}", uri_string, e))?; + let cid = entry.value.to_string(); + + let job = IndexJob { + uri: uri.to_string(), + cid, + action: WriteAction::Create, + record: Some(record_json), + indexed_at: now.clone(), + rev: rev.clone(), + }; + + // Index directly to PostgreSQL + if let Err(e) = IndexerManager::process_job(pool, &job).await { + eprintln!(" Warning: failed to index {}: {}", job.uri, e); + } else { + indexed_count += 1; + } + } else { + skipped_count += 1; + } + + if indexed_count > 0 && indexed_count % 100 == 0 { + print!("\r Indexed {} records...", indexed_count); + } + } + + println!("\r Indexed: {}, Skipped: {}", indexed_count, skipped_count); + + // Update profile_agg + println!(" Updating profile aggregates..."); + let client = pool.get().await?; + client + .execute( + "INSERT INTO profile_agg (did, \"postsCount\") + SELECT $1, COUNT(*) FROM post WHERE creator = $1 + ON CONFLICT (did) DO UPDATE SET \"postsCount\" = EXCLUDED.\"postsCount\"", + &[&did], + ) + .await?; + + client + .execute( + "INSERT INTO profile_agg (did, \"followsCount\") + SELECT $1, COUNT(*) FROM follow WHERE creator = $1 + ON CONFLICT (did) DO UPDATE SET \"followsCount\" = EXCLUDED.\"followsCount\"", + &[&did], + ) + .await?; + + client + .execute( + "INSERT INTO profile_agg (did, \"followersCount\") + SELECT $1, COUNT(*) FROM follow WHERE \"subjectDid\" = $1 + ON CONFLICT (did) DO UPDATE SET \"followersCount\" = EXCLUDED.\"followersCount\"", + &[&did], + ) + .await?; + + Ok(indexed_count) +} From 02313d970dc2e1cef0909277b94bbff222dff152 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Sun, 18 Jan 2026 11:43:19 -0500 Subject: [PATCH 05/42] Add post_embed_image and post_embed_video indexing for media filter The posts_with_media filter in getAuthorFeed was returning empty results because the wintermute indexer was not populating the post_embed_image and post_embed_video tables. Changes: - Update handle_post_embeds() to detect and process image/video embeds - Add handle_embed_images() and handle_embed_video() for single-record indexing - Add extract_embed_data(), extract_images(), extract_video() for bulk processing - Add copy_insert_post_embed_images() and copy_insert_post_embed_videos() bulk functions using COPY protocol - Update copy_batch_insert_posts() to extract and insert embed data This fixes the media tab showing empty on user profiles. Co-Authored-By: Claude Opus 4.5 --- rsky-wintermute/src/indexer/bulk.rs | 177 ++++++++++++++++++++++++ rsky-wintermute/src/indexer/mod.rs | 205 ++++++++++++++++++++++++++-- 2 files changed, 371 insertions(+), 11 deletions(-) diff --git a/rsky-wintermute/src/indexer/bulk.rs b/rsky-wintermute/src/indexer/bulk.rs index 6c40f67b..ac521c7f 100644 --- a/rsky-wintermute/src/indexer/bulk.rs +++ b/rsky-wintermute/src/indexer/bulk.rs @@ -744,6 +744,183 @@ pub async fn copy_insert_blocks( Ok(()) } +/// Bulk insert `post_embed_image` records using `COPY` protocol. +pub async fn copy_insert_post_embed_images( + client: &deadpool_postgres::Client, + data: &[(String, String, String, String)], // post_uri, position, image_cid, alt +) -> Result<(), WintermuteError> { + use std::time::Instant; + + if data.is_empty() { + return Ok(()); + } + + let count = data.len(); + + // Phase 1: Table setup + let setup_start = Instant::now(); + client + .execute( + "CREATE TEMP TABLE IF NOT EXISTS _bulk_post_embed_image ( + post_uri text NOT NULL, + position text NOT NULL, + image_cid text NOT NULL, + alt text NOT NULL + )", + &[], + ) + .await?; + + client + .execute("TRUNCATE _bulk_post_embed_image", &[]) + .await?; + let setup_ms = setup_start.elapsed().as_millis(); + + // Phase 2: COPY data + let copy_start = Instant::now(); + let copy_stmt = client + .copy_in("COPY _bulk_post_embed_image (post_uri, position, image_cid, alt) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t')") + .await?; + + let sink = copy_stmt; + pin_mut!(sink); + + let mut buffer = Vec::with_capacity(data.len() * 150); + for (post_uri, position, image_cid, alt) in data { + // Escape alt text for tabs/newlines + let escaped_alt: String = alt + .chars() + .map(|c| match c { + '\t' | '\n' | '\r' => ' ', + _ => c, + }) + .collect(); + writeln!(buffer, "{post_uri}\t{position}\t{image_cid}\t{escaped_alt}") + .map_err(|e| WintermuteError::Other(format!("buffer write error: {e}")))?; + } + + sink.send(bytes::Bytes::from(buffer)).await?; + sink.close().await?; + let copy_ms = copy_start.elapsed().as_millis(); + + // Phase 3: INSERT...ON CONFLICT + let insert_start = Instant::now(); + client + .execute( + "INSERT INTO post_embed_image (\"postUri\", position, \"imageCid\", alt) + SELECT post_uri, position, image_cid, alt + FROM _bulk_post_embed_image + ON CONFLICT DO NOTHING", + &[], + ) + .await?; + let insert_ms = insert_start.elapsed().as_millis(); + + // Log if total > 100ms (worth investigating) + let total_ms = setup_ms + copy_ms + insert_ms; + if total_ms > 100 { + tracing::warn!( + "SLOW post_embed_image bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms) for {} rows", + total_ms, + setup_ms, + copy_ms, + insert_ms, + count + ); + } + + Ok(()) +} + +/// Bulk insert `post_embed_video` records using `COPY` protocol. +pub async fn copy_insert_post_embed_videos( + client: &deadpool_postgres::Client, + data: &[(String, String, Option)], // post_uri, video_cid, alt +) -> Result<(), WintermuteError> { + use std::time::Instant; + + if data.is_empty() { + return Ok(()); + } + + let count = data.len(); + + // Phase 1: Table setup + let setup_start = Instant::now(); + client + .execute( + "CREATE TEMP TABLE IF NOT EXISTS _bulk_post_embed_video ( + post_uri text NOT NULL, + video_cid text NOT NULL, + alt text + )", + &[], + ) + .await?; + + client + .execute("TRUNCATE _bulk_post_embed_video", &[]) + .await?; + let setup_ms = setup_start.elapsed().as_millis(); + + // Phase 2: COPY data (with NULL handling for alt) + let copy_start = Instant::now(); + let copy_stmt = client + .copy_in("COPY _bulk_post_embed_video (post_uri, video_cid, alt) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t', NULL '\\N')") + .await?; + + let sink = copy_stmt; + pin_mut!(sink); + + let mut buffer = Vec::with_capacity(data.len() * 150); + for (post_uri, video_cid, alt) in data { + let escaped_alt = match alt { + Some(a) => a + .chars() + .map(|c| match c { + '\t' | '\n' | '\r' => ' ', + _ => c, + }) + .collect::(), + None => "\\N".to_owned(), // PostgreSQL NULL marker + }; + writeln!(buffer, "{post_uri}\t{video_cid}\t{escaped_alt}") + .map_err(|e| WintermuteError::Other(format!("buffer write error: {e}")))?; + } + + sink.send(bytes::Bytes::from(buffer)).await?; + sink.close().await?; + let copy_ms = copy_start.elapsed().as_millis(); + + // Phase 3: INSERT...ON CONFLICT + let insert_start = Instant::now(); + client + .execute( + "INSERT INTO post_embed_video (\"postUri\", \"videoCid\", alt) + SELECT post_uri, video_cid, alt + FROM _bulk_post_embed_video + ON CONFLICT DO NOTHING", + &[], + ) + .await?; + let insert_ms = insert_start.elapsed().as_millis(); + + // Log if total > 100ms (worth investigating) + let total_ms = setup_ms + copy_ms + insert_ms; + if total_ms > 100 { + tracing::warn!( + "SLOW post_embed_video bulk: {}ms total (setup={}ms, copy={}ms, insert={}ms) for {} rows", + total_ms, + setup_ms, + copy_ms, + insert_ms, + count + ); + } + + Ok(()) +} + #[cfg(test)] mod tests { #[test] diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 23312f86..bf2ec8c0 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2352,6 +2352,8 @@ impl IndexerManager { Vec::with_capacity(jobs.len()); let mut feed_item_data: Vec<(String, String, String, String, String, String)> = Vec::with_capacity(jobs.len()); + let mut embed_image_data: Vec<(String, String, String, String)> = Vec::new(); + let mut embed_video_data: Vec<(String, String, Option)> = Vec::new(); for pj in jobs { if let Some(record) = &pj.job.record { @@ -2381,21 +2383,112 @@ impl IndexerManager { "post".to_owned(), uri.clone(), pj.job.cid.clone(), - uri, + uri.clone(), pj.did.clone(), sort_at, )); + // Extract embed data for images and videos + if let Some(embed) = record.get("embed") { + Self::extract_embed_data( + embed, + &uri, + &mut embed_image_data, + &mut embed_video_data, + ); + } + metrics::INDEXER_POST_EVENTS_TOTAL.inc(); } } bulk::copy_insert_posts(client, &post_data).await?; bulk::copy_insert_feed_items(client, &feed_item_data).await?; + bulk::copy_insert_post_embed_images(client, &embed_image_data).await?; + bulk::copy_insert_post_embed_videos(client, &embed_video_data).await?; Ok(()) } + /// Extract embed data (images and videos) from a post's embed field + fn extract_embed_data( + embed: &serde_json::Value, + post_uri: &str, + embed_image_data: &mut Vec<(String, String, String, String)>, + embed_video_data: &mut Vec<(String, String, Option)>, + ) { + let embed_type = embed.get("$type").and_then(|t| t.as_str()).unwrap_or(""); + + // Handle app.bsky.embed.images + if embed_type == "app.bsky.embed.images" { + Self::extract_images(embed, post_uri, embed_image_data); + } + + // Handle app.bsky.embed.video + if embed_type == "app.bsky.embed.video" { + Self::extract_video(embed, post_uri, embed_video_data); + } + + // Handle app.bsky.embed.recordWithMedia (has nested media) + if embed_type == "app.bsky.embed.recordWithMedia" { + if let Some(media) = embed.get("media") { + let media_type = media.get("$type").and_then(|t| t.as_str()).unwrap_or(""); + if media_type == "app.bsky.embed.images" { + Self::extract_images(media, post_uri, embed_image_data); + } else if media_type == "app.bsky.embed.video" { + Self::extract_video(media, post_uri, embed_video_data); + } + } + } + } + + fn extract_images( + embed: &serde_json::Value, + post_uri: &str, + embed_image_data: &mut Vec<(String, String, String, String)>, + ) { + if let Some(images) = embed.get("images").and_then(|i| i.as_array()) { + for (position, image) in images.iter().enumerate() { + // Get the image CID - can be in image.ref.$link (CBOR decoded) or image.ref (string) + let image_cid = image.get("image").and_then(|img| { + img.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| img.get("ref").and_then(|r| r.as_str())) + }); + + let alt = image.get("alt").and_then(|a| a.as_str()).unwrap_or(""); + + if let Some(image_cid) = image_cid { + embed_image_data.push(( + post_uri.to_owned(), + position.to_string(), + image_cid.to_owned(), + alt.to_owned(), + )); + } + } + } + } + + fn extract_video( + embed: &serde_json::Value, + post_uri: &str, + embed_video_data: &mut Vec<(String, String, Option)>, + ) { + // Get the video CID - can be in video.ref.$link (CBOR decoded) or video.ref (string) + let video_cid = embed.get("video").and_then(|vid| { + vid.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| vid.get("ref").and_then(|r| r.as_str())) + }); + + let alt = embed.get("alt").and_then(|a| a.as_str()).map(|s| s.to_owned()); + + if let Some(video_cid) = video_cid { + embed_video_data.push((post_uri.to_owned(), video_cid.to_owned(), alt)); + } + } + async fn copy_batch_insert_likes( client: &deadpool_postgres::Client, jobs: &[&ParsedJob<'_>], @@ -2747,25 +2840,115 @@ impl IndexerManager { created_at: &str, indexed_at: &str, ) -> Result<(), WintermuteError> { + let embed_type = embed.get("$type").and_then(|t| t.as_str()).unwrap_or(""); + + // Handle app.bsky.embed.images + if embed_type == "app.bsky.embed.images" { + Self::handle_embed_images(client, embed, post_uri).await?; + } + + // Handle app.bsky.embed.video + if embed_type == "app.bsky.embed.video" { + Self::handle_embed_video(client, embed, post_uri).await?; + } + // Handle app.bsky.embed.record (quote post) - if let Some(record) = embed.get("record") { - Self::handle_embed_record( - client, record, post_uri, post_cid, creator, created_at, indexed_at, - ) - .await?; + if embed_type == "app.bsky.embed.record" { + if let Some(record) = embed.get("record") { + Self::handle_embed_record( + client, record, post_uri, post_cid, creator, created_at, indexed_at, + ) + .await?; + } } // Handle app.bsky.embed.recordWithMedia (quote post with media) - if let Some(record) = embed.get("record").and_then(|r| r.get("record")) { - Self::handle_embed_record( - client, record, post_uri, post_cid, creator, created_at, indexed_at, - ) - .await?; + if embed_type == "app.bsky.embed.recordWithMedia" { + // Handle the record part (quote) + if let Some(record) = embed.get("record").and_then(|r| r.get("record")) { + Self::handle_embed_record( + client, record, post_uri, post_cid, creator, created_at, indexed_at, + ) + .await?; + } + + // Handle the media part (images or video) + if let Some(media) = embed.get("media") { + let media_type = media.get("$type").and_then(|t| t.as_str()).unwrap_or(""); + if media_type == "app.bsky.embed.images" { + Self::handle_embed_images(client, media, post_uri).await?; + } else if media_type == "app.bsky.embed.video" { + Self::handle_embed_video(client, media, post_uri).await?; + } + } } Ok(()) } + async fn handle_embed_images( + client: &deadpool_postgres::Client, + embed: &serde_json::Value, + post_uri: &str, + ) -> Result<(), WintermuteError> { + if let Some(images) = embed.get("images").and_then(|i| i.as_array()) { + for (position, image) in images.iter().enumerate() { + // Get the image CID - can be in image.ref.$link (CBOR decoded) or image.ref (string) + let image_cid = image + .get("image") + .and_then(|img| { + img.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| img.get("ref").and_then(|r| r.as_str())) + }); + + let alt = image.get("alt").and_then(|a| a.as_str()).unwrap_or(""); + + if let Some(image_cid) = image_cid { + let position_str = position.to_string(); + client + .execute( + "INSERT INTO post_embed_image (\"postUri\", position, \"imageCid\", alt) + VALUES ($1, $2, $3, $4) + ON CONFLICT DO NOTHING", + &[&post_uri, &position_str, &image_cid, &alt], + ) + .await?; + } + } + } + Ok(()) + } + + async fn handle_embed_video( + client: &deadpool_postgres::Client, + embed: &serde_json::Value, + post_uri: &str, + ) -> Result<(), WintermuteError> { + // Get the video CID - can be in video.ref.$link (CBOR decoded) or video.ref (string) + let video_cid = embed + .get("video") + .and_then(|vid| { + vid.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| vid.get("ref").and_then(|r| r.as_str())) + }); + + let alt = embed.get("alt").and_then(|a| a.as_str()); + + if let Some(video_cid) = video_cid { + client + .execute( + "INSERT INTO post_embed_video (\"postUri\", \"videoCid\", alt) + VALUES ($1, $2, $3) + ON CONFLICT DO NOTHING", + &[&post_uri, &video_cid, &alt], + ) + .await?; + } + Ok(()) + } + async fn handle_embed_record( client: &deadpool_postgres::Client, record: &serde_json::Value, From fd6063d4e2f0688899faef34e2d3553c36c69d43 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Sun, 18 Jan 2026 11:44:19 -0500 Subject: [PATCH 06/42] Add SQL migrations for backfilling post embed tables Two migration scripts: - backfill_post_embeds.sql: Single-shot migration for smaller datasets - backfill_post_embeds_batched.sql: Batched approach for large tables Run after deploying the indexer fix to populate post_embed_image and post_embed_video for existing posts. Co-Authored-By: Claude Opus 4.5 --- .../migrations/backfill_post_embeds.sql | 63 +++++++++ .../backfill_post_embeds_batched.sql | 131 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 rsky-wintermute/migrations/backfill_post_embeds.sql create mode 100644 rsky-wintermute/migrations/backfill_post_embeds_batched.sql diff --git a/rsky-wintermute/migrations/backfill_post_embeds.sql b/rsky-wintermute/migrations/backfill_post_embeds.sql new file mode 100644 index 00000000..2a5d2bde --- /dev/null +++ b/rsky-wintermute/migrations/backfill_post_embeds.sql @@ -0,0 +1,63 @@ +-- Backfill post_embed_image and post_embed_video tables from existing record JSON +-- Run this migration after deploying the indexer fix + +-- Step 1: Backfill post_embed_image from app.bsky.embed.images +INSERT INTO post_embed_image ("postUri", position, "imageCid", alt) +SELECT + r.uri as "postUri", + (img_idx.idx - 1)::text as position, + img.value->'image'->'ref'->>'$link' as "imageCid", + COALESCE(img.value->>'alt', '') as alt +FROM record r, + jsonb_array_elements((r.json::jsonb)->'embed'->'images') WITH ORDINALITY AS img(value, idx), + LATERAL (SELECT img.idx as idx) img_idx +WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.images' + AND img.value->'image'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Step 2: Backfill post_embed_image from app.bsky.embed.recordWithMedia (images in media) +INSERT INTO post_embed_image ("postUri", position, "imageCid", alt) +SELECT + r.uri as "postUri", + (img_idx.idx - 1)::text as position, + img.value->'image'->'ref'->>'$link' as "imageCid", + COALESCE(img.value->>'alt', '') as alt +FROM record r, + jsonb_array_elements((r.json::jsonb)->'embed'->'media'->'images') WITH ORDINALITY AS img(value, idx), + LATERAL (SELECT img.idx as idx) img_idx +WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (r.json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.images' + AND img.value->'image'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Step 3: Backfill post_embed_video from app.bsky.embed.video +INSERT INTO post_embed_video ("postUri", "videoCid", alt) +SELECT + r.uri as "postUri", + (r.json::jsonb)->'embed'->'video'->'ref'->>'$link' as "videoCid", + (r.json::jsonb)->'embed'->>'alt' as alt +FROM record r +WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.video' + AND (r.json::jsonb)->'embed'->'video'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Step 4: Backfill post_embed_video from app.bsky.embed.recordWithMedia (video in media) +INSERT INTO post_embed_video ("postUri", "videoCid", alt) +SELECT + r.uri as "postUri", + (r.json::jsonb)->'embed'->'media'->'video'->'ref'->>'$link' as "videoCid", + (r.json::jsonb)->'embed'->'media'->>'alt' as alt +FROM record r +WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (r.json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.video' + AND (r.json::jsonb)->'embed'->'media'->'video'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Verification queries (run these to check progress) +-- SELECT COUNT(*) FROM post_embed_image; +-- SELECT COUNT(*) FROM post_embed_video; +-- SELECT COUNT(*) FROM record WHERE uri LIKE 'at://%/app.bsky.feed.post/%' AND (json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.images'; diff --git a/rsky-wintermute/migrations/backfill_post_embeds_batched.sql b/rsky-wintermute/migrations/backfill_post_embeds_batched.sql new file mode 100644 index 00000000..7f75bc28 --- /dev/null +++ b/rsky-wintermute/migrations/backfill_post_embeds_batched.sql @@ -0,0 +1,131 @@ +-- Batched backfill for large tables (prevents long locks) +-- Run each batch separately, adjusting LIMIT/OFFSET as needed + +-- Check total counts first +SELECT + 'images' as type, + COUNT(*) as total +FROM record +WHERE uri LIKE 'at://%/app.bsky.feed.post/%' + AND ( + (json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.images' + OR ( + (json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.images' + ) + ) +UNION ALL +SELECT + 'videos' as type, + COUNT(*) as total +FROM record +WHERE uri LIKE 'at://%/app.bsky.feed.post/%' + AND ( + (json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.video' + OR ( + (json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.video' + ) + ); + +-- Batched image backfill (adjust LIMIT and run multiple times) +-- Batch 1: Direct image embeds +WITH batch AS ( + SELECT r.uri, r.json + FROM record r + LEFT JOIN post_embed_image pei ON pei."postUri" = r.uri + WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.images' + AND pei."postUri" IS NULL + LIMIT 10000 +) +INSERT INTO post_embed_image ("postUri", position, "imageCid", alt) +SELECT + b.uri as "postUri", + (img_idx.idx - 1)::text as position, + img.value->'image'->'ref'->>'$link' as "imageCid", + COALESCE(img.value->>'alt', '') as alt +FROM batch b, + jsonb_array_elements((b.json::jsonb)->'embed'->'images') WITH ORDINALITY AS img(value, idx), + LATERAL (SELECT img.idx as idx) img_idx +WHERE img.value->'image'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Batch 2: Images in recordWithMedia +WITH batch AS ( + SELECT r.uri, r.json + FROM record r + LEFT JOIN post_embed_image pei ON pei."postUri" = r.uri + WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (r.json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.images' + AND pei."postUri" IS NULL + LIMIT 10000 +) +INSERT INTO post_embed_image ("postUri", position, "imageCid", alt) +SELECT + b.uri as "postUri", + (img_idx.idx - 1)::text as position, + img.value->'image'->'ref'->>'$link' as "imageCid", + COALESCE(img.value->>'alt', '') as alt +FROM batch b, + jsonb_array_elements((b.json::jsonb)->'embed'->'media'->'images') WITH ORDINALITY AS img(value, idx), + LATERAL (SELECT img.idx as idx) img_idx +WHERE img.value->'image'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Batched video backfill +-- Batch 1: Direct video embeds +WITH batch AS ( + SELECT r.uri, r.json + FROM record r + LEFT JOIN post_embed_video pev ON pev."postUri" = r.uri + WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.video' + AND pev."postUri" IS NULL + LIMIT 10000 +) +INSERT INTO post_embed_video ("postUri", "videoCid", alt) +SELECT + b.uri as "postUri", + (b.json::jsonb)->'embed'->'video'->'ref'->>'$link' as "videoCid", + (b.json::jsonb)->'embed'->>'alt' as alt +FROM batch b +WHERE (b.json::jsonb)->'embed'->'video'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Batch 2: Videos in recordWithMedia +WITH batch AS ( + SELECT r.uri, r.json + FROM record r + LEFT JOIN post_embed_video pev ON pev."postUri" = r.uri + WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (r.json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.video' + AND pev."postUri" IS NULL + LIMIT 10000 +) +INSERT INTO post_embed_video ("postUri", "videoCid", alt) +SELECT + b.uri as "postUri", + (b.json::jsonb)->'embed'->'media'->'video'->'ref'->>'$link' as "videoCid", + (b.json::jsonb)->'embed'->'media'->>'alt' as alt +FROM batch b +WHERE (b.json::jsonb)->'embed'->'media'->'video'->'ref'->>'$link' IS NOT NULL +ON CONFLICT DO NOTHING; + +-- Check remaining after each batch +SELECT + 'remaining_images' as metric, + COUNT(*) as count +FROM record r +LEFT JOIN post_embed_image pei ON pei."postUri" = r.uri +WHERE r.uri LIKE 'at://%/app.bsky.feed.post/%' + AND ( + (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.images' + OR ( + (r.json::jsonb)->'embed'->>'$type' = 'app.bsky.embed.recordWithMedia' + AND (r.json::jsonb)->'embed'->'media'->>'$type' = 'app.bsky.embed.images' + ) + ) + AND pei."postUri" IS NULL; From 7b1c2ae388483502f8769d933ff2ea4cd0d8a562 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 09:03:47 -0500 Subject: [PATCH 07/42] Fix duplicate notifications and add ON CONFLICT handling - Add ON CONFLICT DO NOTHING to all notification INSERT statements: - like notifications (was missing) - follow notifications (was missing) - repost notifications (was missing) - starterpack-joined notifications (was missing) - reply and quote already had it - Add migration scripts: - dedupe_notifications.sql: Full deduplication for large tables - add_notification_unique_constraint.sql: Adds unique constraint - Fix clippy warnings: - Use ToOwned::to_owned instead of closure - Use map_or_else instead of match for option handling --- .../add_notification_unique_constraint.sql | 25 +++++++++++ .../migrations/dedupe_notifications.sql | 38 +++++++++++++++++ rsky-wintermute/src/indexer/bulk.rs | 21 +++++----- rsky-wintermute/src/indexer/mod.rs | 41 ++++++++++--------- 4 files changed, 96 insertions(+), 29 deletions(-) create mode 100644 rsky-wintermute/migrations/add_notification_unique_constraint.sql create mode 100644 rsky-wintermute/migrations/dedupe_notifications.sql diff --git a/rsky-wintermute/migrations/add_notification_unique_constraint.sql b/rsky-wintermute/migrations/add_notification_unique_constraint.sql new file mode 100644 index 00000000..16cf109b --- /dev/null +++ b/rsky-wintermute/migrations/add_notification_unique_constraint.sql @@ -0,0 +1,25 @@ +-- Add unique constraint to notification table to prevent duplicates +-- This migration should be run AFTER deduplication (see dedupe_notifications.sql) +-- Or on a database with few/no duplicates + +-- Step 1: Delete duplicates, keeping the row with the lowest id +-- This uses a CTE with ROW_NUMBER to identify duplicates +DELETE FROM bsky.notification +WHERE id IN ( + SELECT id FROM ( + SELECT id, + ROW_NUMBER() OVER (PARTITION BY did, "recordUri", reason ORDER BY id) as rn + FROM bsky.notification + ) sub + WHERE rn > 1 +); + +-- Step 2: Add the unique constraint +-- Using CONCURRENTLY to avoid blocking other operations +CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS notification_did_recorduri_reason_unique_idx +ON bsky.notification (did, "recordUri", reason); + +-- Verify the constraint was created +SELECT indexname, indexdef +FROM pg_indexes +WHERE schemaname = 'bsky' AND tablename = 'notification'; diff --git a/rsky-wintermute/migrations/dedupe_notifications.sql b/rsky-wintermute/migrations/dedupe_notifications.sql new file mode 100644 index 00000000..2eed99c4 --- /dev/null +++ b/rsky-wintermute/migrations/dedupe_notifications.sql @@ -0,0 +1,38 @@ +-- Deduplicate notifications and add unique constraint +-- WARNING: This migration operates on 1+ billion rows and will take many hours + +-- Step 1: Create a temporary table with distinct notifications +-- Using ROW_NUMBER() to keep only the first occurrence (lowest id) of each duplicate +CREATE TABLE bsky.notification_deduped AS +SELECT id, did, "recordUri", "recordCid", author, reason, "reasonSubject", "sortAt" +FROM ( + SELECT *, + ROW_NUMBER() OVER (PARTITION BY did, "recordUri", reason ORDER BY id) as rn + FROM bsky.notification +) sub +WHERE rn = 1; + +-- Step 2: Create indexes on the new table (before swap to minimize downtime) +CREATE INDEX notification_deduped_did_sortat_idx ON bsky.notification_deduped (did, "sortAt"); +ALTER TABLE bsky.notification_deduped ADD PRIMARY KEY (id); + +-- Step 3: Add the unique constraint +CREATE UNIQUE INDEX notification_deduped_unique_idx +ON bsky.notification_deduped (did, "recordUri", reason); + +-- Step 4: Swap the tables +-- IMPORTANT: Do this during a maintenance window +BEGIN; +ALTER TABLE bsky.notification RENAME TO notification_old; +ALTER TABLE bsky.notification_deduped RENAME TO notification; +-- Update the sequence to continue from the max id +SELECT setval('bsky.notification_id_seq', (SELECT MAX(id) FROM bsky.notification)); +COMMIT; + +-- Step 5: Drop the old table (after verifying everything works) +-- DROP TABLE bsky.notification_old; + +-- Verification queries: +-- SELECT COUNT(*) FROM bsky.notification; +-- SELECT COUNT(*) FROM bsky.notification_old; +-- SELECT (SELECT COUNT(*) FROM bsky.notification_old) - (SELECT COUNT(*) FROM bsky.notification) as duplicates_removed; diff --git a/rsky-wintermute/src/indexer/bulk.rs b/rsky-wintermute/src/indexer/bulk.rs index ac521c7f..1b6f80e8 100644 --- a/rsky-wintermute/src/indexer/bulk.rs +++ b/rsky-wintermute/src/indexer/bulk.rs @@ -874,16 +874,17 @@ pub async fn copy_insert_post_embed_videos( let mut buffer = Vec::with_capacity(data.len() * 150); for (post_uri, video_cid, alt) in data { - let escaped_alt = match alt { - Some(a) => a - .chars() - .map(|c| match c { - '\t' | '\n' | '\r' => ' ', - _ => c, - }) - .collect::(), - None => "\\N".to_owned(), // PostgreSQL NULL marker - }; + let escaped_alt = alt.as_ref().map_or_else( + || "\\N".to_owned(), // PostgreSQL NULL marker + |a| { + a.chars() + .map(|c| match c { + '\t' | '\n' | '\r' => ' ', + _ => c, + }) + .collect::() + }, + ); writeln!(buffer, "{post_uri}\t{video_cid}\t{escaped_alt}") .map_err(|e| WintermuteError::Other(format!("buffer write error: {e}")))?; } diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index bf2ec8c0..db2afe89 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2482,7 +2482,10 @@ impl IndexerManager { .or_else(|| vid.get("ref").and_then(|r| r.as_str())) }); - let alt = embed.get("alt").and_then(|a| a.as_str()).map(|s| s.to_owned()); + let alt = embed + .get("alt") + .and_then(|a| a.as_str()) + .map(ToOwned::to_owned); if let Some(video_cid) = video_cid { embed_video_data.push((post_uri.to_owned(), video_cid.to_owned(), alt)); @@ -2894,13 +2897,11 @@ impl IndexerManager { if let Some(images) = embed.get("images").and_then(|i| i.as_array()) { for (position, image) in images.iter().enumerate() { // Get the image CID - can be in image.ref.$link (CBOR decoded) or image.ref (string) - let image_cid = image - .get("image") - .and_then(|img| { - img.get("ref") - .and_then(|r| r.get("$link").and_then(|l| l.as_str())) - .or_else(|| img.get("ref").and_then(|r| r.as_str())) - }); + let image_cid = image.get("image").and_then(|img| { + img.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| img.get("ref").and_then(|r| r.as_str())) + }); let alt = image.get("alt").and_then(|a| a.as_str()).unwrap_or(""); @@ -2926,13 +2927,11 @@ impl IndexerManager { post_uri: &str, ) -> Result<(), WintermuteError> { // Get the video CID - can be in video.ref.$link (CBOR decoded) or video.ref (string) - let video_cid = embed - .get("video") - .and_then(|vid| { - vid.get("ref") - .and_then(|r| r.get("$link").and_then(|l| l.as_str())) - .or_else(|| vid.get("ref").and_then(|r| r.as_str())) - }); + let video_cid = embed.get("video").and_then(|vid| { + vid.get("ref") + .and_then(|r| r.get("$link").and_then(|l| l.as_str())) + .or_else(|| vid.get("ref").and_then(|r| r.as_str())) + }); let alt = embed.get("alt").and_then(|a| a.as_str()); @@ -3074,7 +3073,8 @@ impl IndexerManager { client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7)", + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3145,7 +3145,8 @@ impl IndexerManager { client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7)", + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3245,7 +3246,8 @@ impl IndexerManager { client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7)", + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3384,7 +3386,8 @@ impl IndexerManager { client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7)", + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From e3c24bdeb24786c7b92fdf8fb68ec547d4fd3ff5 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 14:39:33 -0500 Subject: [PATCH 08/42] Add rsky-video service for Blacksky video hosting Implements app.bsky.video.* lexicon endpoints using Bunny Stream for video transcoding and CDN delivery. Features: - getUploadLimits: Check user quotas - uploadVideo: Upload video to Bunny Stream - getJobStatus: Poll transcoding status - Bunny webhook handler for encoding completion - URL proxy mapping did/cid to Bunny video IDs Configuration: - BUNNY_LIBRARY_ID, BUNNY_API_KEY, BUNNY_PULL_ZONE - VIDEO_SERVICE_DID, VIDEO_PUBLIC_URL - DATABASE_URL for job tracking --- Cargo.lock | 197 ++++++++++++++- Cargo.toml | 1 + rsky-video/Cargo.toml | 58 +++++ rsky-video/src/auth/mod.rs | 124 ++++++++++ rsky-video/src/bunny/mod.rs | 177 ++++++++++++++ rsky-video/src/bunny/types.rs | 135 +++++++++++ rsky-video/src/config.rs | 76 ++++++ rsky-video/src/error.rs | 103 ++++++++ rsky-video/src/main.rs | 131 ++++++++++ rsky-video/src/xrpc/mod.rs | 435 ++++++++++++++++++++++++++++++++++ 10 files changed, 1433 insertions(+), 4 deletions(-) create mode 100644 rsky-video/Cargo.toml create mode 100644 rsky-video/src/auth/mod.rs create mode 100644 rsky-video/src/bunny/mod.rs create mode 100644 rsky-video/src/bunny/types.rs create mode 100644 rsky-video/src/config.rs create mode 100644 rsky-video/src/error.rs create mode 100644 rsky-video/src/main.rs create mode 100644 rsky-video/src/xrpc/mod.rs diff --git a/Cargo.lock b/Cargo.lock index a82a28fc..6cd93d23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -778,7 +778,7 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls 0.26.2", - "tower", + "tower 0.5.2", "tracing", ] @@ -900,6 +900,74 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "axum-macros", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.7.0", + "hyper-util", + "itoa 1.0.15", + "matchit", + "memchr", + "mime", + "multer", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tower 0.5.2", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -5039,6 +5107,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64 0.22.1", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + [[package]] name = "jwt-simple" version = "0.12.12" @@ -5533,6 +5616,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "maybe-rayon" version = "0.1.1" @@ -6574,6 +6663,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -6890,6 +6989,7 @@ dependencies = [ "postgres-protocol", "serde", "serde_json", + "uuid 1.18.0", ] [[package]] @@ -7575,8 +7675,8 @@ dependencies = [ "tokio-native-tls", "tokio-rustls 0.26.2", "tokio-util", - "tower", - "tower-http", + "tower 0.5.2", + "tower-http 0.6.6", "tower-service", "url", "wasm-bindgen", @@ -8201,6 +8301,37 @@ dependencies = [ "url", ] +[[package]] +name = "rsky-video" +version = "0.1.0" +dependencies = [ + "axum", + "base64 0.22.1", + "bytes", + "chrono", + "color-eyre", + "deadpool-postgres", + "futures", + "jsonwebtoken", + "mockito", + "prometheus", + "reqwest 0.12.23", + "rsky-syntax", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.16", + "tokio", + "tokio-postgres", + "tower 0.4.13", + "tower-http 0.5.2", + "tracing", + "tracing-subscriber", + "url", + "urlencoding", + "uuid 1.18.0", +] + [[package]] name = "rsky-wintermute" version = "0.1.0" @@ -8772,6 +8903,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa 1.0.15", + "serde", + "serde_core", +] + [[package]] name = "serde_qs" version = "0.12.0" @@ -9052,6 +9194,18 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "simple_asn1" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror 2.0.16", + "time", +] + [[package]] name = "siphasher" version = "0.3.11" @@ -10000,6 +10154,21 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d163a63c116ce562a22cda521fcc4d79152e7aba014456fb5eb442f6d6a10109" +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.2" @@ -10013,6 +10182,24 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags 2.9.2", + "bytes", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -10028,7 +10215,7 @@ dependencies = [ "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -10051,6 +10238,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -10468,6 +10656,7 @@ checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be" dependencies = [ "getrandom 0.3.3", "js-sys", + "serde", "wasm-bindgen", ] diff --git a/Cargo.toml b/Cargo.toml index a030ee51..83674550 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "rsky-repo", "rsky-satnav", "rsky-syntax", + "rsky-video", "rsky-wintermute", ] resolver = "2" diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml new file mode 100644 index 00000000..be4a0310 --- /dev/null +++ b/rsky-video/Cargo.toml @@ -0,0 +1,58 @@ +[package] +name = "rsky-video" +version = "0.1.0" +edition = "2024" +authors = ["Rudy Fraser "] +description = "Blacksky video service - handles video uploads, transcoding via Bunny Stream, and playback" + +[[bin]] +name = "video-service" +path = "src/main.rs" + +[dependencies] +# Async runtime +tokio = { workspace = true } +futures = { version = "0.3", default-features = false, features = ["std"] } + +# Web framework +axum = { version = "0.7", features = ["macros", "multipart"] } +tower = { version = "0.4", features = ["util"] } +tower-http = { version = "0.5", features = ["cors", "trace"] } + +# HTTP client for Bunny API +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls-webpki-roots-no-provider", "stream"] } + +# Serialization +serde = { workspace = true } +serde_json = { workspace = true } + +# Database +tokio-postgres = { version = "0.7", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } +deadpool-postgres = "0.13" + +# Auth / crypto +jsonwebtoken = "9" +base64 = "0.22" + +# Utilities +chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } +uuid = { version = "1", features = ["v4", "serde"] } +thiserror = "2" +color-eyre = "0.6" +url = "2" +bytes = "1" +urlencoding = "2" + +# Logging/tracing +tracing = { version = "0.1", features = ["release_max_level_debug"] } +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +# Metrics +prometheus = { version = "0.13", features = ["process"] } + +# rsky crates +rsky-syntax = { workspace = true } + +[dev-dependencies] +mockito = "1.7.0" +tempfile = "3" diff --git a/rsky-video/src/auth/mod.rs b/rsky-video/src/auth/mod.rs new file mode 100644 index 00000000..9bb3790e --- /dev/null +++ b/rsky-video/src/auth/mod.rs @@ -0,0 +1,124 @@ +//! Service authentication handling +//! +//! Validates service auth tokens from AT Protocol clients. +//! For MVP, we do basic JWT validation. Full validation would verify +//! the signature against the PDS's signing key. + +use base64::{Engine, engine::general_purpose::URL_SAFE_NO_PAD}; +use serde::{Deserialize, Serialize}; +use tracing::{debug, warn}; + +use crate::error::{Error, Result}; + +/// Decoded service auth token claims +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ServiceAuthClaims { + /// Issuer (the PDS DID) + pub iss: String, + /// Audience (should be the video service DID) + pub aud: String, + /// Subject (the user's DID) + pub sub: String, + /// Lexicon method being authorized + pub lxm: Option, + /// Expiration time (Unix timestamp) + pub exp: i64, + /// Issued at time (Unix timestamp) + pub iat: Option, +} + +/// Extract and validate the Authorization header +pub fn extract_auth_header(auth_header: Option<&str>) -> Result { + let header = auth_header + .ok_or_else(|| Error::Unauthorized("Missing Authorization header".to_string()))?; + + if !header.starts_with("Bearer ") { + return Err(Error::Unauthorized( + "Invalid Authorization header format".to_string(), + )); + } + + Ok(header[7..].to_string()) +} + +/// Decode and validate a service auth JWT (basic validation) +/// +/// For MVP, this does: +/// - Decode the JWT payload +/// - Check expiration +/// - Validate audience matches our service DID +/// +/// Full implementation would also: +/// - Resolve the issuer's signing key from their PDS +/// - Verify the JWT signature +pub fn validate_service_auth( + token: &str, + expected_aud: &str, + expected_lxm: Option<&str>, +) -> Result { + // Split JWT into parts + let parts: Vec<&str> = token.split('.').collect(); + if parts.len() != 3 { + return Err(Error::Unauthorized("Invalid JWT format".to_string())); + } + + // Decode the payload (middle part) + let payload_bytes = URL_SAFE_NO_PAD + .decode(parts[1]) + .map_err(|e| Error::Unauthorized(format!("Failed to decode JWT payload: {}", e)))?; + + let claims: ServiceAuthClaims = serde_json::from_slice(&payload_bytes) + .map_err(|e| Error::Unauthorized(format!("Failed to parse JWT claims: {}", e)))?; + + debug!( + "Service auth: iss={}, sub={}, aud={}, lxm={:?}", + claims.iss, claims.sub, claims.aud, claims.lxm + ); + + // Check expiration + let now = chrono::Utc::now().timestamp(); + if claims.exp < now { + return Err(Error::Unauthorized("Token has expired".to_string())); + } + + // Validate audience + if claims.aud != expected_aud { + warn!( + "Invalid audience: expected {}, got {}", + expected_aud, claims.aud + ); + return Err(Error::Unauthorized("Invalid token audience".to_string())); + } + + // Validate lexicon method if expected + if let Some(expected) = expected_lxm { + if claims.lxm.as_deref() != Some(expected) { + warn!("Invalid lxm: expected {}, got {:?}", expected, claims.lxm); + return Err(Error::Unauthorized("Invalid token scope".to_string())); + } + } + + Ok(claims) +} + +/// Extract the user DID from an Authorization header +pub fn get_user_did(auth_header: Option<&str>, service_did: &str) -> Result { + let token = extract_auth_header(auth_header)?; + let claims = validate_service_auth(&token, service_did, None)?; + Ok(claims.sub) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_auth_header() { + assert!(extract_auth_header(None).is_err()); + assert!(extract_auth_header(Some("Basic xyz")).is_err()); + assert_eq!( + extract_auth_header(Some("Bearer mytoken")).unwrap(), + "mytoken" + ); + } +} diff --git a/rsky-video/src/bunny/mod.rs b/rsky-video/src/bunny/mod.rs new file mode 100644 index 00000000..ef0ecc11 --- /dev/null +++ b/rsky-video/src/bunny/mod.rs @@ -0,0 +1,177 @@ +//! Bunny Stream API client + +mod types; + +pub use types::*; + +use crate::error::{Error, Result}; +use bytes::Bytes; +use tracing::{debug, info}; + +const BUNNY_API_BASE: &str = "https://video.bunnycdn.com"; + +/// Client for interacting with Bunny Stream API +#[derive(Debug, Clone)] +pub struct BunnyClient { + library_id: String, + api_key: String, + pull_zone: String, + client: reqwest::Client, +} + +impl BunnyClient { + pub fn new(library_id: String, api_key: String, pull_zone: String) -> Self { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build() + .expect("Failed to create HTTP client"); + + Self { + library_id, + api_key, + pull_zone, + client, + } + } + + /// Create a new video object in Bunny Stream + /// Returns the video GUID that can be used for uploading + pub async fn create_video(&self, title: &str) -> Result { + let url = format!("{}/library/{}/videos", BUNNY_API_BASE, self.library_id); + + debug!("Creating video in Bunny: {}", title); + + let response = self + .client + .post(&url) + .header("AccessKey", &self.api_key) + .header("Content-Type", "application/json") + .json(&serde_json::json!({ + "title": title + })) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::BunnyApi(format!( + "Failed to create video: {} - {}", + status, body + ))); + } + + let video: CreateVideoResponse = response.json().await?; + info!("Created Bunny video: {}", video.guid); + Ok(video) + } + + /// Upload video binary data to Bunny Stream + pub async fn upload_video(&self, video_id: &str, data: Bytes) -> Result<()> { + let url = format!( + "{}/library/{}/videos/{}", + BUNNY_API_BASE, self.library_id, video_id + ); + + debug!( + "Uploading {} bytes to Bunny video: {}", + data.len(), + video_id + ); + + let response = self + .client + .put(&url) + .header("AccessKey", &self.api_key) + .header("Content-Type", "application/octet-stream") + .body(data) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::BunnyApi(format!( + "Failed to upload video: {} - {}", + status, body + ))); + } + + info!("Uploaded video to Bunny: {}", video_id); + Ok(()) + } + + /// Get video status from Bunny Stream + pub async fn get_video(&self, video_id: &str) -> Result { + let url = format!( + "{}/library/{}/videos/{}", + BUNNY_API_BASE, self.library_id, video_id + ); + + let response = self + .client + .get(&url) + .header("AccessKey", &self.api_key) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::BunnyApi(format!( + "Failed to get video: {} - {}", + status, body + ))); + } + + Ok(response.json().await?) + } + + /// Delete a video from Bunny Stream + pub async fn delete_video(&self, video_id: &str) -> Result<()> { + let url = format!( + "{}/library/{}/videos/{}", + BUNNY_API_BASE, self.library_id, video_id + ); + + let response = self + .client + .delete(&url) + .header("AccessKey", &self.api_key) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::BunnyApi(format!( + "Failed to delete video: {} - {}", + status, body + ))); + } + + info!("Deleted Bunny video: {}", video_id); + Ok(()) + } + + /// Get the HLS playlist URL for a video + pub fn get_playlist_url(&self, video_id: &str) -> String { + format!( + "https://{}.b-cdn.net/{}/playlist.m3u8", + self.pull_zone, video_id + ) + } + + /// Get the thumbnail URL for a video + pub fn get_thumbnail_url(&self, video_id: &str) -> String { + format!( + "https://{}.b-cdn.net/{}/thumbnail.jpg", + self.pull_zone, video_id + ) + } + + /// Get the pull zone hostname + pub fn pull_zone(&self) -> &str { + &self.pull_zone + } +} diff --git a/rsky-video/src/bunny/types.rs b/rsky-video/src/bunny/types.rs new file mode 100644 index 00000000..705946d4 --- /dev/null +++ b/rsky-video/src/bunny/types.rs @@ -0,0 +1,135 @@ +//! Bunny Stream API types + +use serde::{Deserialize, Serialize}; + +/// Response from creating a new video +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CreateVideoResponse { + /// Unique video identifier (GUID) + pub guid: String, + /// Video title + pub title: Option, + /// Library ID the video belongs to + pub video_library_id: i64, +} + +/// Video information from Bunny Stream +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct VideoInfo { + /// Unique video identifier + pub guid: String, + /// Video title + pub title: Option, + /// Video library ID + pub video_library_id: i64, + /// Encoding status (0-10) + pub status: i32, + /// Video duration in seconds + #[serde(default)] + pub length: f64, + /// Video width + #[serde(default)] + pub width: i32, + /// Video height + #[serde(default)] + pub height: i32, + /// File size in bytes + #[serde(default)] + pub storage_size: i64, + /// Thumbnail filename + pub thumbnail_file_name: Option, + /// Whether transcoding is complete + #[serde(default)] + pub encode_progress: i32, + /// Available resolutions + #[serde(default)] + pub available_resolutions: Option, +} + +impl VideoInfo { + /// Check if encoding is complete (status 3 or 4) + pub fn is_encoding_complete(&self) -> bool { + self.status == 3 || self.status == 4 + } + + /// Check if encoding failed (status 5) + pub fn is_encoding_failed(&self) -> bool { + self.status == 5 + } + + /// Get encoding progress as percentage + pub fn encoding_progress(&self) -> i32 { + self.encode_progress + } +} + +/// Webhook payload from Bunny Stream +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct WebhookPayload { + /// Video library ID + pub video_library_id: i64, + /// Video GUID + pub video_guid: String, + /// Status code (0-10) + /// 0 = Queued, 1 = Processing, 2 = Encoding, 3 = Finished + /// 4 = Resolution Finished, 5 = Failed + /// 6 = PresignedUploadStarted, 7 = PresignedUploadFinished + /// 8 = PresignedUploadFailed, 9 = CaptionsGenerated + /// 10 = TitleOrDescriptionGenerated + pub status: i32, +} + +impl WebhookPayload { + /// Check if encoding is complete + pub fn is_finished(&self) -> bool { + self.status == 3 + } + + /// Check if a resolution finished (video playable) + pub fn is_resolution_finished(&self) -> bool { + self.status == 4 + } + + /// Check if encoding failed + pub fn is_failed(&self) -> bool { + self.status == 5 + } + + /// Get human-readable status + pub fn status_name(&self) -> &'static str { + match self.status { + 0 => "Queued", + 1 => "Processing", + 2 => "Encoding", + 3 => "Finished", + 4 => "ResolutionFinished", + 5 => "Failed", + 6 => "PresignedUploadStarted", + 7 => "PresignedUploadFinished", + 8 => "PresignedUploadFailed", + 9 => "CaptionsGenerated", + 10 => "TitleOrDescriptionGenerated", + _ => "Unknown", + } + } +} + +/// Bunny encoding status codes +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +pub enum BunnyStatus { + Queued = 0, + Processing = 1, + Encoding = 2, + Finished = 3, + ResolutionFinished = 4, + Failed = 5, + PresignedUploadStarted = 6, + PresignedUploadFinished = 7, + PresignedUploadFailed = 8, + CaptionsGenerated = 9, + TitleOrDescriptionGenerated = 10, +} diff --git a/rsky-video/src/config.rs b/rsky-video/src/config.rs new file mode 100644 index 00000000..859e421b --- /dev/null +++ b/rsky-video/src/config.rs @@ -0,0 +1,76 @@ +//! Configuration for the video service + +use color_eyre::Result; +use std::env; + +/// Application configuration loaded from environment variables +#[derive(Debug, Clone)] +pub struct AppConfig { + /// Host to bind to + pub host: String, + /// Port to listen on + pub port: u16, + /// Database connection URL + pub database_url: String, + + /// Bunny Stream Library ID + pub bunny_library_id: String, + /// Bunny Stream API Key + pub bunny_api_key: String, + /// Bunny Pull Zone hostname (e.g., "blacksky-video.b-cdn.net") + pub bunny_pull_zone: String, + + /// This service's DID (e.g., "did:web:video.blacksky.community") + pub service_did: String, + /// Public URL of this service + pub public_url: String, + + /// Maximum video file size in bytes (default: 100MB) + pub max_video_size: u64, + /// Maximum video duration in seconds (default: 90) + pub max_video_duration: u32, + /// Daily video upload limit per user + pub daily_video_limit: u32, + /// Daily byte upload limit per user (default: 10GB) + pub daily_byte_limit: u64, +} + +impl AppConfig { + /// Load configuration from environment variables + pub fn from_env() -> Result { + Ok(Self { + host: env::var("VIDEO_HOST").unwrap_or_else(|_| "0.0.0.0".to_string()), + port: env::var("VIDEO_PORT") + .ok() + .and_then(|p| p.parse().ok()) + .unwrap_or(3500), + database_url: env::var("DATABASE_URL").expect("DATABASE_URL must be set"), + + bunny_library_id: env::var("BUNNY_LIBRARY_ID").expect("BUNNY_LIBRARY_ID must be set"), + bunny_api_key: env::var("BUNNY_API_KEY").expect("BUNNY_API_KEY must be set"), + bunny_pull_zone: env::var("BUNNY_PULL_ZONE").expect("BUNNY_PULL_ZONE must be set"), + + service_did: env::var("VIDEO_SERVICE_DID") + .unwrap_or_else(|_| "did:web:video.blacksky.community".to_string()), + public_url: env::var("VIDEO_PUBLIC_URL") + .unwrap_or_else(|_| "https://video.blacksky.community".to_string()), + + max_video_size: env::var("MAX_VIDEO_SIZE") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(100_000_000), // 100MB + max_video_duration: env::var("MAX_VIDEO_DURATION") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(90), // 90 seconds + daily_video_limit: env::var("DAILY_VIDEO_LIMIT") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(25), + daily_byte_limit: env::var("DAILY_BYTE_LIMIT") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(10_737_418_240), // 10GB + }) + } +} diff --git a/rsky-video/src/error.rs b/rsky-video/src/error.rs new file mode 100644 index 00000000..6dde9bfd --- /dev/null +++ b/rsky-video/src/error.rs @@ -0,0 +1,103 @@ +//! Error types for the video service + +use axum::{ + Json, + http::StatusCode, + response::{IntoResponse, Response}, +}; +use serde_json::json; + +pub type Result = std::result::Result; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Unauthorized: {0}")] + Unauthorized(String), + + #[error("Forbidden: {0}")] + Forbidden(String), + + #[error("Not found: {0}")] + NotFound(String), + + #[error("Bad request: {0}")] + BadRequest(String), + + #[error("Rate limited: {0}")] + RateLimited(String), + + #[error("Upload limit exceeded: {0}")] + UploadLimitExceeded(String), + + #[error("Video too large: {0}")] + VideoTooLarge(String), + + #[error("Internal error: {0}")] + Internal(String), + + #[error("Database error: {0}")] + Database(#[from] tokio_postgres::Error), + + #[error("Pool error: {0}")] + Pool(#[from] deadpool_postgres::PoolError), + + #[error("Bunny API error: {0}")] + BunnyApi(String), + + #[error("HTTP error: {0}")] + Http(#[from] reqwest::Error), + + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), +} + +impl IntoResponse for Error { + fn into_response(self) -> Response { + let (status, error_message) = match &self { + Error::Unauthorized(msg) => (StatusCode::UNAUTHORIZED, msg.clone()), + Error::Forbidden(msg) => (StatusCode::FORBIDDEN, msg.clone()), + Error::NotFound(msg) => (StatusCode::NOT_FOUND, msg.clone()), + Error::BadRequest(msg) => (StatusCode::BAD_REQUEST, msg.clone()), + Error::RateLimited(msg) => (StatusCode::TOO_MANY_REQUESTS, msg.clone()), + Error::UploadLimitExceeded(msg) => (StatusCode::TOO_MANY_REQUESTS, msg.clone()), + Error::VideoTooLarge(msg) => (StatusCode::PAYLOAD_TOO_LARGE, msg.clone()), + Error::Internal(msg) => (StatusCode::INTERNAL_SERVER_ERROR, msg.clone()), + Error::Database(e) => { + tracing::error!("Database error: {}", e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "Database error".to_string(), + ) + } + Error::Pool(e) => { + tracing::error!("Pool error: {}", e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "Database pool error".to_string(), + ) + } + Error::BunnyApi(msg) => { + tracing::error!("Bunny API error: {}", msg); + ( + StatusCode::BAD_GATEWAY, + format!("Video service error: {}", msg), + ) + } + Error::Http(e) => { + tracing::error!("HTTP error: {}", e); + (StatusCode::BAD_GATEWAY, "HTTP request failed".to_string()) + } + Error::Json(e) => { + tracing::error!("JSON error: {}", e); + (StatusCode::BAD_REQUEST, "Invalid JSON".to_string()) + } + }; + + let body = Json(json!({ + "error": error_message, + "message": error_message, + })); + + (status, body).into_response() + } +} diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs new file mode 100644 index 00000000..3aa0b3a1 --- /dev/null +++ b/rsky-video/src/main.rs @@ -0,0 +1,131 @@ +//! Blacksky Video Service +//! +//! Handles video uploads, transcoding via Bunny Stream, and playback URL proxying. +//! Implements the app.bsky.video.* lexicon endpoints. + +use std::net::SocketAddr; +use std::sync::Arc; + +use axum::{ + Router, + routing::{get, post}, +}; +use deadpool_postgres::{Config as PgConfig, Runtime}; +use tokio_postgres::NoTls; +use tower_http::cors::{Any, CorsLayer}; +use tower_http::trace::TraceLayer; +use tracing::{Level, info}; +use tracing_subscriber::{EnvFilter, fmt, prelude::*}; + +mod auth; +mod bunny; +mod config; +mod db; +mod error; +mod xrpc; + +pub use config::AppConfig; +pub use error::{Error, Result}; + +/// Shared application state +pub struct AppState { + pub config: AppConfig, + pub db_pool: deadpool_postgres::Pool, + pub bunny_client: bunny::BunnyClient, + pub http_client: reqwest::Client, +} + +#[tokio::main] +async fn main() -> color_eyre::Result<()> { + color_eyre::install()?; + + // Initialize tracing + tracing_subscriber::registry() + .with(fmt::layer()) + .with( + EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new("info,rsky_video=debug")), + ) + .init(); + + // Load configuration + let config = AppConfig::from_env()?; + info!( + "Starting Blacksky Video Service on {}:{}", + config.host, config.port + ); + + // Initialize database pool + let mut pg_config = PgConfig::new(); + pg_config.url = Some(config.database_url.clone()); + let db_pool = pg_config.create_pool(Some(Runtime::Tokio1), NoTls)?; + + // Run migrations + db::run_migrations(&db_pool).await?; + + // Initialize Bunny client + let bunny_client = bunny::BunnyClient::new( + config.bunny_library_id.clone(), + config.bunny_api_key.clone(), + config.bunny_pull_zone.clone(), + ); + + // Initialize HTTP client for PDS uploads + let http_client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build()?; + + // Create shared state + let state = Arc::new(AppState { + config: config.clone(), + db_pool, + bunny_client, + http_client, + }); + + // Build router + let app = Router::new() + // XRPC endpoints + .route( + "/xrpc/app.bsky.video.getUploadLimits", + get(xrpc::get_upload_limits), + ) + .route("/xrpc/app.bsky.video.uploadVideo", post(xrpc::upload_video)) + .route( + "/xrpc/app.bsky.video.getJobStatus", + get(xrpc::get_job_status), + ) + // Webhook endpoint for Bunny callbacks + .route("/webhook/bunny", post(xrpc::bunny_webhook)) + // Video proxy endpoints + .route("/stream/:did/:cid/playlist.m3u8", get(xrpc::proxy_playlist)) + .route( + "/stream/:did/:cid/thumbnail.jpg", + get(xrpc::proxy_thumbnail), + ) + // Health check + .route("/health", get(health_check)) + .route("/_health", get(health_check)) + // Add middleware + .layer(TraceLayer::new_for_http()) + .layer( + CorsLayer::new() + .allow_origin(Any) + .allow_methods(Any) + .allow_headers(Any), + ) + .with_state(state); + + // Start server + let addr = SocketAddr::from(([0, 0, 0, 0], config.port)); + info!("Listening on {}", addr); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} + +async fn health_check() -> &'static str { + "OK" +} diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs new file mode 100644 index 00000000..3cbadaa5 --- /dev/null +++ b/rsky-video/src/xrpc/mod.rs @@ -0,0 +1,435 @@ +//! XRPC endpoint handlers for app.bsky.video.* methods + +use std::sync::Arc; + +use axum::{ + Json, + body::Body, + extract::{Path, Query, State}, + http::{HeaderMap, StatusCode, header}, + response::{IntoResponse, Response}, +}; +use bytes::Bytes; +use serde::{Deserialize, Serialize}; +use serde_json::{Value as JsonValue, json}; +use tracing::{debug, error, info, warn}; +use uuid::Uuid; + +use crate::{ + AppState, auth, + bunny::WebhookPayload, + db::{self, job_state}, + error::{Error, Result}, +}; + +/// Query parameters for getUploadLimits +#[derive(Debug, Deserialize)] +pub struct GetUploadLimitsParams {} + +/// Response for getUploadLimits +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct GetUploadLimitsResponse { + pub can_upload: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub remaining_daily_videos: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub remaining_daily_bytes: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +/// GET /xrpc/app.bsky.video.getUploadLimits +pub async fn get_upload_limits( + State(state): State>, + headers: HeaderMap, +) -> Result> { + // Validate service auth + let auth_header = headers + .get(header::AUTHORIZATION) + .and_then(|v| v.to_str().ok()); + + let user_did = auth::get_user_did(auth_header, &state.config.service_did)?; + debug!("getUploadLimits for user: {}", user_did); + + // Get user's quota + let quota = db::get_or_create_quota(&state.db_pool, &user_did).await?; + + let remaining_videos = state.config.daily_video_limit as i32 - quota.daily_videos_used; + let remaining_bytes = state.config.daily_byte_limit as i64 - quota.daily_bytes_used; + + // Check if user can upload + let can_upload = remaining_videos > 0 && remaining_bytes > 0; + + let response = if can_upload { + GetUploadLimitsResponse { + can_upload: true, + remaining_daily_videos: Some(remaining_videos), + remaining_daily_bytes: Some(remaining_bytes), + message: None, + error: None, + } + } else if remaining_videos <= 0 { + GetUploadLimitsResponse { + can_upload: false, + remaining_daily_videos: Some(0), + remaining_daily_bytes: Some(remaining_bytes), + message: Some("User has exceeded daily upload videos limit".to_string()), + error: None, + } + } else { + GetUploadLimitsResponse { + can_upload: false, + remaining_daily_videos: Some(remaining_videos), + remaining_daily_bytes: Some(0), + message: Some("User has exceeded daily upload bytes limit".to_string()), + error: None, + } + }; + + Ok(Json(response)) +} + +/// Query parameters for uploadVideo +#[derive(Debug, Deserialize)] +pub struct UploadVideoParams { + pub did: String, + pub name: String, +} + +/// Response for uploadVideo +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct UploadVideoResponse { + pub job_status: JobStatus, +} + +/// Job status in API responses +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct JobStatus { + pub job_id: String, + pub did: String, + pub state: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub progress: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub blob: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +/// POST /xrpc/app.bsky.video.uploadVideo +pub async fn upload_video( + State(state): State>, + headers: HeaderMap, + Query(params): Query, + body: Bytes, +) -> Result> { + // Validate service auth + let auth_header = headers + .get(header::AUTHORIZATION) + .and_then(|v| v.to_str().ok()); + + // The token should be for com.atproto.repo.uploadBlob + let token = auth::extract_auth_header(auth_header)?; + let claims = auth::validate_service_auth(&token, &state.config.service_did, None)?; + + // Verify the DID matches + if claims.sub != params.did { + return Err(Error::Forbidden( + "Token subject does not match upload DID".to_string(), + )); + } + + let user_did = ¶ms.did; + let file_size = body.len() as i64; + + info!( + "uploadVideo: did={}, name={}, size={}", + user_did, params.name, file_size + ); + + // Check file size + if file_size > state.config.max_video_size as i64 { + return Err(Error::VideoTooLarge(format!( + "file size ({} bytes) is larger than the maximum allowed size ({} bytes)", + file_size, state.config.max_video_size + ))); + } + + // Check quota + let quota = db::get_or_create_quota(&state.db_pool, user_did).await?; + let remaining_videos = state.config.daily_video_limit as i32 - quota.daily_videos_used; + let remaining_bytes = state.config.daily_byte_limit as i64 - quota.daily_bytes_used; + + if remaining_videos <= 0 { + return Err(Error::UploadLimitExceeded( + "User has exceeded daily upload videos limit".to_string(), + )); + } + if remaining_bytes < file_size { + return Err(Error::UploadLimitExceeded( + "User has exceeded daily upload bytes limit".to_string(), + )); + } + + // Create job in database + let job = db::create_job( + &state.db_pool, + user_did, + Some(¶ms.name), + Some(file_size), + ) + .await?; + + let job_id = job.job_id; + info!("Created job: {}", job_id); + + // Create video in Bunny Stream + let title = format!("{}_{}", user_did, params.name); + let bunny_video = match state.bunny_client.create_video(&title).await { + Ok(v) => v, + Err(e) => { + error!("Failed to create Bunny video: {}", e); + db::fail_job(&state.db_pool, job_id, &e.to_string()).await?; + return Err(e); + } + }; + + let bunny_video_id = bunny_video.guid.clone(); + db::set_bunny_video_id(&state.db_pool, job_id, &bunny_video_id).await?; + + // Upload video to Bunny + if let Err(e) = state.bunny_client.upload_video(&bunny_video_id, body).await { + error!("Failed to upload to Bunny: {}", e); + db::fail_job(&state.db_pool, job_id, &e.to_string()).await?; + return Err(e); + } + + // Update job state to processing + db::update_job_state(&state.db_pool, job_id, job_state::PROCESSING, 0).await?; + + // Increment quota + db::increment_quota(&state.db_pool, user_did, file_size).await?; + + info!( + "Video uploaded to Bunny: job={}, bunny_id={}", + job_id, bunny_video_id + ); + + Ok(Json(UploadVideoResponse { + job_status: JobStatus { + job_id: job_id.to_string(), + did: user_did.to_string(), + state: job_state::PROCESSING.to_string(), + progress: Some(0), + blob: None, + error: None, + message: Some("Video is being processed".to_string()), + }, + })) +} + +/// Query parameters for getJobStatus +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct GetJobStatusParams { + pub job_id: String, +} + +/// Response for getJobStatus +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct GetJobStatusResponse { + pub job_status: JobStatus, +} + +/// GET /xrpc/app.bsky.video.getJobStatus +pub async fn get_job_status( + State(state): State>, + Query(params): Query, +) -> Result> { + let job_id = Uuid::parse_str(¶ms.job_id) + .map_err(|_| Error::BadRequest("Invalid job ID format".to_string()))?; + + let job = db::get_job(&state.db_pool, job_id) + .await? + .ok_or_else(|| Error::NotFound("Job not found".to_string()))?; + + // If job is still processing, check Bunny status + let (state_str, progress) = if job.state == job_state::PROCESSING { + if let Some(ref bunny_id) = job.bunny_video_id { + match state.bunny_client.get_video(bunny_id).await { + Ok(video_info) => { + if video_info.is_encoding_complete() { + (job_state::PROCESSING.to_string(), 99) + } else if video_info.is_encoding_failed() { + (job_state::FAILED.to_string(), job.progress) + } else { + (job.state.clone(), video_info.encoding_progress()) + } + } + Err(e) => { + warn!("Failed to get Bunny video status: {}", e); + (job.state.clone(), job.progress) + } + } + } else { + (job.state.clone(), job.progress) + } + } else { + (job.state.clone(), job.progress) + }; + + Ok(Json(GetJobStatusResponse { + job_status: JobStatus { + job_id: job.job_id.to_string(), + did: job.did, + state: state_str, + progress: Some(progress), + blob: job.blob_ref, + error: job.error, + message: job.message, + }, + })) +} + +/// POST /webhook/bunny - Handle Bunny Stream webhook callbacks +pub async fn bunny_webhook( + State(state): State>, + Json(payload): Json, +) -> Result { + info!( + "Bunny webhook: video={}, status={} ({})", + payload.video_guid, + payload.status, + payload.status_name() + ); + + // Find the job by bunny video ID + let job = match db::get_job_by_bunny_id(&state.db_pool, &payload.video_guid).await? { + Some(j) => j, + None => { + warn!("Webhook for unknown video: {}", payload.video_guid); + return Ok(StatusCode::OK); + } + }; + + if payload.is_finished() || payload.is_resolution_finished() { + // Video encoding is complete + info!("Video encoding complete: job={}", job.job_id); + + // Get video info from Bunny + let video_info = state.bunny_client.get_video(&payload.video_guid).await?; + + // Create a blob ref that points to the video + // In a full implementation, we'd upload to the user's PDS here + // For MVP, we create a synthetic blob ref + let blob_ref = json!({ + "$type": "blob", + "ref": { + "$link": payload.video_guid + }, + "mimeType": "video/mp4", + "size": video_info.storage_size + }); + + // Save the mapping for URL proxy + // The CID is the bunny video ID for now + db::save_video_mapping( + &state.db_pool, + &job.did, + &payload.video_guid, + &payload.video_guid, + ) + .await?; + + // Mark job as complete + db::complete_job(&state.db_pool, job.job_id, blob_ref).await?; + + info!("Job completed: {}", job.job_id); + } else if payload.is_failed() { + // Video encoding failed + error!("Video encoding failed: job={}", job.job_id); + db::fail_job(&state.db_pool, job.job_id, "Video encoding failed").await?; + } else { + // Update progress + let progress = match payload.status { + 0 => 0, // Queued + 1 => 10, // Processing + 2 => 50, // Encoding + _ => job.progress, + }; + db::update_job_state(&state.db_pool, job.job_id, job_state::PROCESSING, progress).await?; + } + + Ok(StatusCode::OK) +} + +/// Path parameters for video proxy +#[derive(Debug, Deserialize)] +pub struct VideoProxyPath { + pub did: String, + pub cid: String, +} + +/// GET /stream/:did/:cid/playlist.m3u8 - Proxy HLS playlist +pub async fn proxy_playlist( + State(state): State>, + Path(path): Path, +) -> Result { + let did = urlencoding::decode(&path.did) + .map_err(|_| Error::BadRequest("Invalid DID encoding".to_string()))?; + let cid = urlencoding::decode(&path.cid) + .map_err(|_| Error::BadRequest("Invalid CID encoding".to_string()))?; + + debug!("Proxy playlist: did={}, cid={}", did, cid); + + // Look up the bunny video ID + let bunny_video_id = db::get_bunny_video_id(&state.db_pool, &did, &cid) + .await? + .ok_or_else(|| Error::NotFound("Video not found".to_string()))?; + + // Redirect to Bunny CDN + let bunny_url = state.bunny_client.get_playlist_url(&bunny_video_id); + + Ok(Response::builder() + .status(StatusCode::TEMPORARY_REDIRECT) + .header(header::LOCATION, bunny_url) + .header(header::CACHE_CONTROL, "public, max-age=3600") + .body(Body::empty()) + .unwrap()) +} + +/// GET /stream/:did/:cid/thumbnail.jpg - Proxy thumbnail +pub async fn proxy_thumbnail( + State(state): State>, + Path(path): Path, +) -> Result { + let did = urlencoding::decode(&path.did) + .map_err(|_| Error::BadRequest("Invalid DID encoding".to_string()))?; + let cid = urlencoding::decode(&path.cid) + .map_err(|_| Error::BadRequest("Invalid CID encoding".to_string()))?; + + debug!("Proxy thumbnail: did={}, cid={}", did, cid); + + // Look up the bunny video ID + let bunny_video_id = db::get_bunny_video_id(&state.db_pool, &did, &cid) + .await? + .ok_or_else(|| Error::NotFound("Video not found".to_string()))?; + + // Redirect to Bunny CDN + let bunny_url = state.bunny_client.get_thumbnail_url(&bunny_video_id); + + Ok(Response::builder() + .status(StatusCode::TEMPORARY_REDIRECT) + .header(header::LOCATION, bunny_url) + .header(header::CACHE_CONTROL, "public, max-age=86400") + .body(Body::empty()) + .unwrap()) +} From 793b140d0ed55f6ffa145e7f5bc44e248bac4335 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 14:42:14 -0500 Subject: [PATCH 09/42] Fix rsky-video: add db module, fix pattern matching --- .gitignore | 5 +- rsky-video/src/db/mod.rs | 402 +++++++++++++++++++++++++++++++++++++ rsky-video/src/xrpc/mod.rs | 2 +- 3 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 rsky-video/src/db/mod.rs diff --git a/.gitignore b/.gitignore index 031e0e41..edc7eee8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ NOTE.md Rocket.toml *.pem **/data/ -db/ +/db/ *.db *.db-shm *.db-wal @@ -168,4 +168,5 @@ rsky-wintermute/*.md rsky-wintermute/*.json !rsky-wintermute/README.md CLAUDE.md -.claude \ No newline at end of file +.claude +docs \ No newline at end of file diff --git a/rsky-video/src/db/mod.rs b/rsky-video/src/db/mod.rs new file mode 100644 index 00000000..d9926076 --- /dev/null +++ b/rsky-video/src/db/mod.rs @@ -0,0 +1,402 @@ +//! Database operations for video jobs and quotas + +use chrono::{DateTime, Utc}; +use deadpool_postgres::Pool; +use serde::{Deserialize, Serialize}; +use serde_json::Value as JsonValue; +use tracing::info; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// Video job record +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VideoJob { + pub id: i64, + pub job_id: Uuid, + pub did: String, + pub bunny_video_id: Option, + pub state: String, + pub progress: i32, + pub blob_ref: Option, + pub error: Option, + pub message: Option, + pub original_filename: Option, + pub file_size: Option, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +/// Job state constants +pub mod job_state { + pub const CREATED: &str = "JOB_STATE_CREATED"; + pub const UPLOADING: &str = "JOB_STATE_UPLOADING"; + pub const PROCESSING: &str = "JOB_STATE_PROCESSING"; + pub const COMPLETED: &str = "JOB_STATE_COMPLETED"; + pub const FAILED: &str = "JOB_STATE_FAILED"; +} + +/// Upload quota record +#[derive(Debug, Clone)] +pub struct UploadQuota { + pub did: String, + pub daily_videos_used: i32, + pub daily_bytes_used: i64, + pub quota_reset_at: DateTime, +} + +/// Run database migrations +pub async fn run_migrations(pool: &Pool) -> Result<()> { + let client = pool.get().await?; + + // Create video_jobs table + client + .execute( + r#" + CREATE TABLE IF NOT EXISTS video_jobs ( + id BIGSERIAL PRIMARY KEY, + job_id UUID NOT NULL UNIQUE, + did TEXT NOT NULL, + bunny_video_id TEXT, + state TEXT NOT NULL DEFAULT 'JOB_STATE_CREATED', + progress INTEGER DEFAULT 0, + blob_ref JSONB, + error TEXT, + message TEXT, + original_filename TEXT, + file_size BIGINT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() + ) + "#, + &[], + ) + .await?; + + // Create index on job_id + client + .execute( + "CREATE INDEX IF NOT EXISTS idx_video_jobs_job_id ON video_jobs (job_id)", + &[], + ) + .await?; + + // Create index on bunny_video_id for webhook lookups + client + .execute( + "CREATE INDEX IF NOT EXISTS idx_video_jobs_bunny_video_id ON video_jobs (bunny_video_id)", + &[], + ) + .await?; + + // Create index on did for quota lookups + client + .execute( + "CREATE INDEX IF NOT EXISTS idx_video_jobs_did ON video_jobs (did)", + &[], + ) + .await?; + + // Create upload_quotas table + client + .execute( + r#" + CREATE TABLE IF NOT EXISTS upload_quotas ( + did TEXT PRIMARY KEY, + daily_videos_used INTEGER DEFAULT 0, + daily_bytes_used BIGINT DEFAULT 0, + quota_reset_at TIMESTAMPTZ DEFAULT NOW(), + created_at TIMESTAMPTZ DEFAULT NOW() + ) + "#, + &[], + ) + .await?; + + // Create video_mappings table for did/cid -> bunny_video_id mapping + client + .execute( + r#" + CREATE TABLE IF NOT EXISTS video_mappings ( + id BIGSERIAL PRIMARY KEY, + did TEXT NOT NULL, + cid TEXT NOT NULL, + bunny_video_id TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE(did, cid) + ) + "#, + &[], + ) + .await?; + + // Create index for video mapping lookups + client + .execute( + "CREATE INDEX IF NOT EXISTS idx_video_mappings_did_cid ON video_mappings (did, cid)", + &[], + ) + .await?; + + info!("Database migrations completed"); + Ok(()) +} + +/// Create a new video job +pub async fn create_job( + pool: &Pool, + did: &str, + filename: Option<&str>, + file_size: Option, +) -> Result { + let client = pool.get().await?; + let job_id = Uuid::new_v4(); + + let row = client + .query_one( + r#" + INSERT INTO video_jobs (job_id, did, original_filename, file_size) + VALUES ($1, $2, $3, $4) + RETURNING id, job_id, did, bunny_video_id, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + "#, + &[&job_id, &did, &filename, &file_size], + ) + .await?; + + Ok(row_to_job(&row)) +} + +/// Get a job by job_id +pub async fn get_job(pool: &Pool, job_id: Uuid) -> Result> { + let client = pool.get().await?; + + let row = client + .query_opt( + r#" + SELECT id, job_id, did, bunny_video_id, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + FROM video_jobs + WHERE job_id = $1 + "#, + &[&job_id], + ) + .await?; + + Ok(row.map(|r| row_to_job(&r))) +} + +/// Get a job by bunny_video_id (for webhook handling) +pub async fn get_job_by_bunny_id(pool: &Pool, bunny_video_id: &str) -> Result> { + let client = pool.get().await?; + + let row = client + .query_opt( + r#" + SELECT id, job_id, did, bunny_video_id, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + FROM video_jobs + WHERE bunny_video_id = $1 + "#, + &[&bunny_video_id], + ) + .await?; + + Ok(row.map(|r| row_to_job(&r))) +} + +/// Update job with bunny video ID +pub async fn set_bunny_video_id(pool: &Pool, job_id: Uuid, bunny_video_id: &str) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + UPDATE video_jobs + SET bunny_video_id = $2, state = 'JOB_STATE_UPLOADING', updated_at = NOW() + WHERE job_id = $1 + "#, + &[&job_id, &bunny_video_id], + ) + .await?; + + Ok(()) +} + +/// Update job state +pub async fn update_job_state(pool: &Pool, job_id: Uuid, state: &str, progress: i32) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + UPDATE video_jobs + SET state = $2, progress = $3, updated_at = NOW() + WHERE job_id = $1 + "#, + &[&job_id, &state, &progress], + ) + .await?; + + Ok(()) +} + +/// Mark job as completed with blob ref +pub async fn complete_job(pool: &Pool, job_id: Uuid, blob_ref: JsonValue) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + UPDATE video_jobs + SET state = 'JOB_STATE_COMPLETED', progress = 100, blob_ref = $2, updated_at = NOW() + WHERE job_id = $1 + "#, + &[&job_id, &blob_ref], + ) + .await?; + + Ok(()) +} + +/// Mark job as failed +pub async fn fail_job(pool: &Pool, job_id: Uuid, error: &str) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + UPDATE video_jobs + SET state = 'JOB_STATE_FAILED', error = $2, updated_at = NOW() + WHERE job_id = $1 + "#, + &[&job_id, &error], + ) + .await?; + + Ok(()) +} + +/// Get or create upload quota for a user +pub async fn get_or_create_quota(pool: &Pool, did: &str) -> Result { + let client = pool.get().await?; + let now = Utc::now(); + + // Try to get existing quota + let row = client + .query_opt( + "SELECT did, daily_videos_used, daily_bytes_used, quota_reset_at FROM upload_quotas WHERE did = $1", + &[&did], + ) + .await?; + + if let Some(row) = row { + let quota_reset_at: DateTime = row.get(3); + + // Check if quota should be reset (new day) + if now.date_naive() > quota_reset_at.date_naive() { + // Reset quota + client + .execute( + "UPDATE upload_quotas SET daily_videos_used = 0, daily_bytes_used = 0, quota_reset_at = $2 WHERE did = $1", + &[&did, &now], + ) + .await?; + + return Ok(UploadQuota { + did: did.to_string(), + daily_videos_used: 0, + daily_bytes_used: 0, + quota_reset_at: now, + }); + } + + return Ok(UploadQuota { + did: row.get(0), + daily_videos_used: row.get(1), + daily_bytes_used: row.get(2), + quota_reset_at, + }); + } + + // Create new quota record + client + .execute( + "INSERT INTO upload_quotas (did, quota_reset_at) VALUES ($1, $2) ON CONFLICT (did) DO NOTHING", + &[&did, &now], + ) + .await?; + + Ok(UploadQuota { + did: did.to_string(), + daily_videos_used: 0, + daily_bytes_used: 0, + quota_reset_at: now, + }) +} + +/// Increment quota usage +pub async fn increment_quota(pool: &Pool, did: &str, bytes: i64) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + "UPDATE upload_quotas SET daily_videos_used = daily_videos_used + 1, daily_bytes_used = daily_bytes_used + $2 WHERE did = $1", + &[&did, &bytes], + ) + .await?; + + Ok(()) +} + +/// Save video mapping (did/cid -> bunny_video_id) +pub async fn save_video_mapping( + pool: &Pool, + did: &str, + cid: &str, + bunny_video_id: &str, +) -> Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + INSERT INTO video_mappings (did, cid, bunny_video_id) + VALUES ($1, $2, $3) + ON CONFLICT (did, cid) DO UPDATE SET bunny_video_id = $3 + "#, + &[&did, &cid, &bunny_video_id], + ) + .await?; + + Ok(()) +} + +/// Get bunny video ID from did/cid mapping +pub async fn get_bunny_video_id(pool: &Pool, did: &str, cid: &str) -> Result> { + let client = pool.get().await?; + + let row = client + .query_opt( + "SELECT bunny_video_id FROM video_mappings WHERE did = $1 AND cid = $2", + &[&did, &cid], + ) + .await?; + + Ok(row.map(|r| r.get(0))) +} + +fn row_to_job(row: &tokio_postgres::Row) -> VideoJob { + VideoJob { + id: row.get(0), + job_id: row.get(1), + did: row.get(2), + bunny_video_id: row.get(3), + state: row.get(4), + progress: row.get(5), + blob_ref: row.get(6), + error: row.get(7), + message: row.get(8), + original_filename: row.get(9), + file_size: row.get(10), + created_at: row.get(11), + updated_at: row.get(12), + } +} diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 3cbadaa5..1dbf965b 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -263,7 +263,7 @@ pub async fn get_job_status( // If job is still processing, check Bunny status let (state_str, progress) = if job.state == job_state::PROCESSING { - if let Some(ref bunny_id) = job.bunny_video_id { + if let Some(bunny_id) = &job.bunny_video_id { match state.bunny_client.get_video(bunny_id).await { Ok(video_info) => { if video_info.is_encoding_complete() { From 58876ac211f46acdf8440d53493fef4bb4270ef7 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 14:44:18 -0500 Subject: [PATCH 10/42] Fix TLS provider: add rustls aws_lc_rs initialization --- rsky-video/Cargo.toml | 3 ++- rsky-video/src/main.rs | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml index be4a0310..23843c9e 100644 --- a/rsky-video/Cargo.toml +++ b/rsky-video/Cargo.toml @@ -30,9 +30,10 @@ serde_json = { workspace = true } tokio-postgres = { version = "0.7", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } deadpool-postgres = "0.13" -# Auth / crypto +# Auth / crypto / TLS jsonwebtoken = "9" base64 = "0.22" +rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs"] } # Utilities chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs index 3aa0b3a1..97bc3f4c 100644 --- a/rsky-video/src/main.rs +++ b/rsky-video/src/main.rs @@ -14,7 +14,8 @@ use deadpool_postgres::{Config as PgConfig, Runtime}; use tokio_postgres::NoTls; use tower_http::cors::{Any, CorsLayer}; use tower_http::trace::TraceLayer; -use tracing::{Level, info}; +use rustls::crypto::aws_lc_rs::default_provider; +use tracing::info; use tracing_subscriber::{EnvFilter, fmt, prelude::*}; mod auth; @@ -39,6 +40,9 @@ pub struct AppState { async fn main() -> color_eyre::Result<()> { color_eyre::install()?; + // Initialize TLS crypto provider + default_provider().install_default().unwrap(); + // Initialize tracing tracing_subscriber::registry() .with(fmt::layer()) From 1b775442571b79c8d68bc41b2b601f5b614dcdd4 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 17:41:01 -0500 Subject: [PATCH 11/42] Add Bluesky CDN fallback for videos not in our database --- rsky-video/src/xrpc/mod.rs | 42 +++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 1dbf965b..3a10c970 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -390,17 +390,22 @@ pub async fn proxy_playlist( debug!("Proxy playlist: did={}, cid={}", did, cid); - // Look up the bunny video ID - let bunny_video_id = db::get_bunny_video_id(&state.db_pool, &did, &cid) - .await? - .ok_or_else(|| Error::NotFound("Video not found".to_string()))?; - - // Redirect to Bunny CDN - let bunny_url = state.bunny_client.get_playlist_url(&bunny_video_id); + // Look up the bunny video ID in our database + let redirect_url = match db::get_bunny_video_id(&state.db_pool, &did, &cid).await? { + Some(bunny_video_id) => { + // Video is in our system - redirect to Bunny CDN + state.bunny_client.get_playlist_url(&bunny_video_id) + } + None => { + // Video not in our system - fallback to Bluesky's video CDN + debug!("Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", did, cid); + format!("https://video.bsky.app/watch/{}/{}/playlist.m3u8", did, cid) + } + }; Ok(Response::builder() .status(StatusCode::TEMPORARY_REDIRECT) - .header(header::LOCATION, bunny_url) + .header(header::LOCATION, redirect_url) .header(header::CACHE_CONTROL, "public, max-age=3600") .body(Body::empty()) .unwrap()) @@ -418,17 +423,22 @@ pub async fn proxy_thumbnail( debug!("Proxy thumbnail: did={}, cid={}", did, cid); - // Look up the bunny video ID - let bunny_video_id = db::get_bunny_video_id(&state.db_pool, &did, &cid) - .await? - .ok_or_else(|| Error::NotFound("Video not found".to_string()))?; - - // Redirect to Bunny CDN - let bunny_url = state.bunny_client.get_thumbnail_url(&bunny_video_id); + // Look up the bunny video ID in our database + let redirect_url = match db::get_bunny_video_id(&state.db_pool, &did, &cid).await? { + Some(bunny_video_id) => { + // Video is in our system - redirect to Bunny CDN + state.bunny_client.get_thumbnail_url(&bunny_video_id) + } + None => { + // Video not in our system - fallback to Bluesky's video CDN + debug!("Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", did, cid); + format!("https://video.bsky.app/watch/{}/{}/thumbnail.jpg", did, cid) + } + }; Ok(Response::builder() .status(StatusCode::TEMPORARY_REDIRECT) - .header(header::LOCATION, bunny_url) + .header(header::LOCATION, redirect_url) .header(header::CACHE_CONTROL, "public, max-age=86400") .body(Body::empty()) .unwrap()) From 08fe19706b98e69df9e755c5a8369b23f32352d6 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 17:51:20 -0500 Subject: [PATCH 12/42] Fix JWT auth: use iss as user DID when sub is absent --- rsky-video/src/auth/mod.rs | 21 +++++++++++++++------ rsky-video/src/xrpc/mod.rs | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/rsky-video/src/auth/mod.rs b/rsky-video/src/auth/mod.rs index 9bb3790e..7d17bafe 100644 --- a/rsky-video/src/auth/mod.rs +++ b/rsky-video/src/auth/mod.rs @@ -13,18 +13,27 @@ use crate::error::{Error, Result}; /// Decoded service auth token claims #[derive(Debug, Clone, Deserialize, Serialize)] pub struct ServiceAuthClaims { - /// Issuer (the PDS DID) + /// Issuer (the user's DID when PDS signs on user's behalf) pub iss: String, /// Audience (should be the video service DID) pub aud: String, - /// Subject (the user's DID) - pub sub: String, + /// Subject (the user's DID) - optional, may use iss instead + pub sub: Option, /// Lexicon method being authorized pub lxm: Option, /// Expiration time (Unix timestamp) pub exp: i64, /// Issued at time (Unix timestamp) pub iat: Option, + /// JWT ID + pub jti: Option, +} + +impl ServiceAuthClaims { + /// Get the user DID - uses sub if present, otherwise iss + pub fn user_did(&self) -> &str { + self.sub.as_deref().unwrap_or(&self.iss) + } } /// Extract and validate the Authorization header @@ -71,8 +80,8 @@ pub fn validate_service_auth( .map_err(|e| Error::Unauthorized(format!("Failed to parse JWT claims: {}", e)))?; debug!( - "Service auth: iss={}, sub={}, aud={}, lxm={:?}", - claims.iss, claims.sub, claims.aud, claims.lxm + "Service auth: iss={}, sub={:?}, aud={}, lxm={:?}, user_did={}", + claims.iss, claims.sub, claims.aud, claims.lxm, claims.user_did() ); // Check expiration @@ -105,7 +114,7 @@ pub fn validate_service_auth( pub fn get_user_did(auth_header: Option<&str>, service_did: &str) -> Result { let token = extract_auth_header(auth_header)?; let claims = validate_service_auth(&token, service_did, None)?; - Ok(claims.sub) + Ok(claims.user_did().to_string()) } #[cfg(test)] diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 3a10c970..816e2509 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -140,7 +140,7 @@ pub async fn upload_video( let claims = auth::validate_service_auth(&token, &state.config.service_did, None)?; // Verify the DID matches - if claims.sub != params.did { + if claims.user_did() != params.did { return Err(Error::Forbidden( "Token subject does not match upload DID".to_string(), )); From 9ade15f1c02cbd49fd1796f63ce0572a42449ad4 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 18:13:22 -0500 Subject: [PATCH 13/42] Add 100MB body limit for video uploads --- rsky-video/src/main.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs index 97bc3f4c..47f7994e 100644 --- a/rsky-video/src/main.rs +++ b/rsky-video/src/main.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use axum::{ Router, + extract::DefaultBodyLimit, routing::{get, post}, }; use deadpool_postgres::{Config as PgConfig, Runtime}; @@ -94,7 +95,10 @@ async fn main() -> color_eyre::Result<()> { "/xrpc/app.bsky.video.getUploadLimits", get(xrpc::get_upload_limits), ) - .route("/xrpc/app.bsky.video.uploadVideo", post(xrpc::upload_video)) + .route( + "/xrpc/app.bsky.video.uploadVideo", + post(xrpc::upload_video), + ) .route( "/xrpc/app.bsky.video.getJobStatus", get(xrpc::get_job_status), @@ -111,6 +115,7 @@ async fn main() -> color_eyre::Result<()> { .route("/health", get(health_check)) .route("/_health", get(health_check)) // Add middleware + .layer(DefaultBodyLimit::max(100 * 1024 * 1024)) // 100MB for video uploads .layer(TraceLayer::new_for_http()) .layer( CorsLayer::new() From ea1e11d2f2e33fe60f6fe71330d7ea1461c1ddea Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 18:19:49 -0500 Subject: [PATCH 14/42] Return flat JobStatus from uploadVideo endpoint --- rsky-video/src/xrpc/mod.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 816e2509..b62865a6 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -129,7 +129,7 @@ pub async fn upload_video( headers: HeaderMap, Query(params): Query, body: Bytes, -) -> Result> { +) -> Result> { // Validate service auth let auth_header = headers .get(header::AUTHORIZATION) @@ -222,16 +222,15 @@ pub async fn upload_video( job_id, bunny_video_id ); - Ok(Json(UploadVideoResponse { - job_status: JobStatus { - job_id: job_id.to_string(), - did: user_did.to_string(), - state: job_state::PROCESSING.to_string(), - progress: Some(0), - blob: None, - error: None, - message: Some("Video is being processed".to_string()), - }, + // Return flat JobStatus (not wrapped) - client expects this format + Ok(Json(JobStatus { + job_id: job_id.to_string(), + did: user_did.to_string(), + state: job_state::PROCESSING.to_string(), + progress: Some(0), + blob: None, + error: None, + message: Some("Video is being processed".to_string()), })) } From f3f0f201232db2be17edf85e01b1df84fb9fc722 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 18:23:57 -0500 Subject: [PATCH 15/42] Return flat JobStatus from getJobStatus endpoint --- rsky-video/src/xrpc/mod.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index b62865a6..e63f17c3 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -252,7 +252,7 @@ pub struct GetJobStatusResponse { pub async fn get_job_status( State(state): State>, Query(params): Query, -) -> Result> { +) -> Result> { let job_id = Uuid::parse_str(¶ms.job_id) .map_err(|_| Error::BadRequest("Invalid job ID format".to_string()))?; @@ -285,16 +285,15 @@ pub async fn get_job_status( (job.state.clone(), job.progress) }; - Ok(Json(GetJobStatusResponse { - job_status: JobStatus { - job_id: job.job_id.to_string(), - did: job.did, - state: state_str, - progress: Some(progress), - blob: job.blob_ref, - error: job.error, - message: job.message, - }, + // Return flat JobStatus (not wrapped) - client expects this format + Ok(Json(JobStatus { + job_id: job.job_id.to_string(), + did: job.did, + state: state_str, + progress: Some(progress), + blob: job.blob_ref, + error: job.error, + message: job.message, })) } From 8d33a3c60c79ce31623787758ac9afbdc19e3da1 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 18:37:10 -0500 Subject: [PATCH 16/42] Keep getJobStatus wrapped (SDK expects it), uploadVideo flat (XHR expects it) --- rsky-video/src/xrpc/mod.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index e63f17c3..71b42af1 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -252,7 +252,7 @@ pub struct GetJobStatusResponse { pub async fn get_job_status( State(state): State>, Query(params): Query, -) -> Result> { +) -> Result> { let job_id = Uuid::parse_str(¶ms.job_id) .map_err(|_| Error::BadRequest("Invalid job ID format".to_string()))?; @@ -285,15 +285,17 @@ pub async fn get_job_status( (job.state.clone(), job.progress) }; - // Return flat JobStatus (not wrapped) - client expects this format - Ok(Json(JobStatus { - job_id: job.job_id.to_string(), - did: job.did, - state: state_str, - progress: Some(progress), - blob: job.blob_ref, - error: job.error, - message: job.message, + // Return wrapped format - SDK expects response.data.jobStatus + Ok(Json(GetJobStatusResponse { + job_status: JobStatus { + job_id: job.job_id.to_string(), + did: job.did, + state: state_str, + progress: Some(progress), + blob: job.blob_ref, + error: job.error, + message: job.message, + }, })) } From 7eab2317359ae0563dbfd6c97af112379b67d9fe Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 18:45:11 -0500 Subject: [PATCH 17/42] Generate proper AT Protocol CID for video blob references Previously the video service was using Bunny's UUID as the blob $link, which is invalid per AT Protocol spec. CIDs must be content-addressed hashes (e.g., bafkreibjfgx2gprinfvicegelk5kosd6y2frmqpqzwqkg7usac74l3t2v4). Changes: - Add CID generation using cid and multihash-codetable crates - Generate CIDv1 (raw codec 0x55, SHA-256) from video bytes during upload - Store video_cid in job record for later use - Use proper CID in blob reference when webhook completes job - Update video_mappings to use content CID instead of Bunny UUID This ensures video blobs have valid content-addressed identifiers that comply with the AT Protocol specification. --- Cargo.lock | 3 +++ rsky-video/Cargo.toml | 4 ++++ rsky-video/src/db/mod.rs | 43 ++++++++++++++++++++++++-------------- rsky-video/src/xrpc/mod.rs | 43 ++++++++++++++++++++++++++++---------- 4 files changed, 66 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6cd93d23..dec6b548 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8309,14 +8309,17 @@ dependencies = [ "base64 0.22.1", "bytes", "chrono", + "cid 0.11.1", "color-eyre", "deadpool-postgres", "futures", "jsonwebtoken", "mockito", + "multihash-codetable", "prometheus", "reqwest 0.12.23", "rsky-syntax", + "rustls 0.23.31", "serde", "serde_json", "tempfile", diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml index 23843c9e..a75e8330 100644 --- a/rsky-video/Cargo.toml +++ b/rsky-video/Cargo.toml @@ -44,6 +44,10 @@ url = "2" bytes = "1" urlencoding = "2" +# CID generation +cid = "0.11" +multihash-codetable = { version = "0.1", features = ["sha2"] } + # Logging/tracing tracing = { version = "0.1", features = ["release_max_level_debug"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/rsky-video/src/db/mod.rs b/rsky-video/src/db/mod.rs index d9926076..6e4e1fbf 100644 --- a/rsky-video/src/db/mod.rs +++ b/rsky-video/src/db/mod.rs @@ -16,6 +16,7 @@ pub struct VideoJob { pub job_id: Uuid, pub did: String, pub bunny_video_id: Option, + pub video_cid: Option, pub state: String, pub progress: i32, pub blob_ref: Option, @@ -58,6 +59,7 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { job_id UUID NOT NULL UNIQUE, did TEXT NOT NULL, bunny_video_id TEXT, + video_cid TEXT, state TEXT NOT NULL DEFAULT 'JOB_STATE_CREATED', progress INTEGER DEFAULT 0, blob_ref JSONB, @@ -73,6 +75,14 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { ) .await?; + // Add video_cid column if it doesn't exist (migration for existing tables) + client + .execute( + "ALTER TABLE video_jobs ADD COLUMN IF NOT EXISTS video_cid TEXT", + &[], + ) + .await?; + // Create index on job_id client .execute( @@ -157,7 +167,7 @@ pub async fn create_job( r#" INSERT INTO video_jobs (job_id, did, original_filename, file_size) VALUES ($1, $2, $3, $4) - RETURNING id, job_id, did, bunny_video_id, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + RETURNING id, job_id, did, bunny_video_id, video_cid, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at "#, &[&job_id, &did, &filename, &file_size], ) @@ -173,7 +183,7 @@ pub async fn get_job(pool: &Pool, job_id: Uuid) -> Result> { let row = client .query_opt( r#" - SELECT id, job_id, did, bunny_video_id, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + SELECT id, job_id, did, bunny_video_id, video_cid, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at FROM video_jobs WHERE job_id = $1 "#, @@ -191,7 +201,7 @@ pub async fn get_job_by_bunny_id(pool: &Pool, bunny_video_id: &str) -> Result Result Result<()> { +/// Update job with bunny video ID and content CID +pub async fn set_bunny_video_id(pool: &Pool, job_id: Uuid, bunny_video_id: &str, video_cid: &str) -> Result<()> { let client = pool.get().await?; client .execute( r#" UPDATE video_jobs - SET bunny_video_id = $2, state = 'JOB_STATE_UPLOADING', updated_at = NOW() + SET bunny_video_id = $2, video_cid = $3, state = 'JOB_STATE_UPLOADING', updated_at = NOW() WHERE job_id = $1 "#, - &[&job_id, &bunny_video_id], + &[&job_id, &bunny_video_id, &video_cid], ) .await?; @@ -389,14 +399,15 @@ fn row_to_job(row: &tokio_postgres::Row) -> VideoJob { job_id: row.get(1), did: row.get(2), bunny_video_id: row.get(3), - state: row.get(4), - progress: row.get(5), - blob_ref: row.get(6), - error: row.get(7), - message: row.get(8), - original_filename: row.get(9), - file_size: row.get(10), - created_at: row.get(11), - updated_at: row.get(12), + video_cid: row.get(4), + state: row.get(5), + progress: row.get(6), + blob_ref: row.get(7), + error: row.get(8), + message: row.get(9), + original_filename: row.get(10), + file_size: row.get(11), + created_at: row.get(12), + updated_at: row.get(13), } } diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 71b42af1..1814a194 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -7,9 +7,11 @@ use axum::{ body::Body, extract::{Path, Query, State}, http::{HeaderMap, StatusCode, header}, - response::{IntoResponse, Response}, + response::Response, }; use bytes::Bytes; +use cid::Cid; +use multihash_codetable::{Code, MultihashDigest}; use serde::{Deserialize, Serialize}; use serde_json::{Value as JsonValue, json}; use tracing::{debug, error, info, warn}; @@ -22,6 +24,19 @@ use crate::{ error::{Error, Result}, }; +/// Generate a CIDv1 (raw codec, SHA-256) from video bytes +/// Returns base32-encoded CID string starting with 'b' (e.g., bafkreibjfgx...) +fn generate_video_cid(data: &[u8]) -> String { + // Create a multihash using SHA2-256 (code 0x12) + let mh = Code::Sha2_256.digest(data); + + // Create CIDv1 with raw codec (0x55) + let cid = Cid::new_v1(0x55, mh); + + // Encode as base32lower (multibase 'b' prefix) + cid.to_string() +} + /// Query parameters for getUploadLimits #[derive(Debug, Deserialize)] pub struct GetUploadLimitsParams {} @@ -190,6 +205,10 @@ pub async fn upload_video( let job_id = job.job_id; info!("Created job: {}", job_id); + // Generate CID from video content (before upload consumes the bytes) + let video_cid = generate_video_cid(&body); + info!("Generated video CID: {}", video_cid); + // Create video in Bunny Stream let title = format!("{}_{}", user_did, params.name); let bunny_video = match state.bunny_client.create_video(&title).await { @@ -202,7 +221,7 @@ pub async fn upload_video( }; let bunny_video_id = bunny_video.guid.clone(); - db::set_bunny_video_id(&state.db_pool, job_id, &bunny_video_id).await?; + db::set_bunny_video_id(&state.db_pool, job_id, &bunny_video_id, &video_cid).await?; // Upload video to Bunny if let Err(e) = state.bunny_client.upload_video(&bunny_video_id, body).await { @@ -324,27 +343,29 @@ pub async fn bunny_webhook( // Video encoding is complete info!("Video encoding complete: job={}", job.job_id); - // Get video info from Bunny + // Get the content CID from the job (generated during upload) + let video_cid = job.video_cid.ok_or_else(|| { + Error::Internal("Job missing video CID".to_string()) + })?; + + // Get video info from Bunny for file size let video_info = state.bunny_client.get_video(&payload.video_guid).await?; - // Create a blob ref that points to the video - // In a full implementation, we'd upload to the user's PDS here - // For MVP, we create a synthetic blob ref + // Create a blob ref with the proper content-addressed CID let blob_ref = json!({ "$type": "blob", "ref": { - "$link": payload.video_guid + "$link": video_cid }, "mimeType": "video/mp4", "size": video_info.storage_size }); - // Save the mapping for URL proxy - // The CID is the bunny video ID for now + // Save the mapping for URL proxy: (did, cid) -> bunny_video_id db::save_video_mapping( &state.db_pool, &job.did, - &payload.video_guid, + &video_cid, &payload.video_guid, ) .await?; @@ -352,7 +373,7 @@ pub async fn bunny_webhook( // Mark job as complete db::complete_job(&state.db_pool, job.job_id, blob_ref).await?; - info!("Job completed: {}", job.job_id); + info!("Job completed: job={}, cid={}", job.job_id, video_cid); } else if payload.is_failed() { // Video encoding failed error!("Video encoding failed: job={}", job.job_id); From 4ff11af1e9b85d15664ca32d688529c2169a8381 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 19:22:12 -0500 Subject: [PATCH 18/42] Upload blob to user's PDS before returning blob_ref Implements proper AT Protocol blob flow: - Video service now uploads blob to user's PDS first using forwarded service auth token - PDS returns valid blob_ref which is stored in video_jobs.pds_blob_ref - Client can then reference the blob in posts without BlobNotFound errors Key changes: - Add pds/ module using atrium for AT Protocol operations - Add pds_blob_ref column to video_jobs table - Extract PDS DID from token's aud claim - Resolve did:web directly, did:plc via plc.directory - Upload blob to PDS, then to Bunny for transcoding --- rsky-video/Cargo.toml | 9 +- rsky-video/src/auth/mod.rs | 34 ++++- rsky-video/src/bunny/mod.rs | 31 ++++ rsky-video/src/bunny/types.rs | 4 +- rsky-video/src/db/mod.rs | 56 +++++-- rsky-video/src/main.rs | 6 + rsky-video/src/pds/mod.rs | 277 ++++++++++++++++++++++++++++++++++ rsky-video/src/xrpc/mod.rs | 85 ++++++----- 8 files changed, 446 insertions(+), 56 deletions(-) create mode 100644 rsky-video/src/pds/mod.rs diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml index a75e8330..3ab0b369 100644 --- a/rsky-video/Cargo.toml +++ b/rsky-video/Cargo.toml @@ -30,9 +30,9 @@ serde_json = { workspace = true } tokio-postgres = { version = "0.7", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } deadpool-postgres = "0.13" -# Auth / crypto / TLS +# Auth / crypto / TLS / encoding jsonwebtoken = "9" -base64 = "0.22" +base64 = { version = "0.22", features = ["std"] } rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs"] } # Utilities @@ -48,6 +48,11 @@ urlencoding = "2" cid = "0.11" multihash-codetable = { version = "0.1", features = ["sha2"] } +# AT Protocol client (local atrium) +atrium-api = { path = "/Users/rudyfraser/Projects/atrium/atrium-api", features = ["agent"] } +atrium-xrpc = { path = "/Users/rudyfraser/Projects/atrium/atrium-xrpc" } +atrium-xrpc-client = { path = "/Users/rudyfraser/Projects/atrium/atrium-xrpc-client" } + # Logging/tracing tracing = { version = "0.1", features = ["release_max_level_debug"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/rsky-video/src/auth/mod.rs b/rsky-video/src/auth/mod.rs index 7d17bafe..a3182d44 100644 --- a/rsky-video/src/auth/mod.rs +++ b/rsky-video/src/auth/mod.rs @@ -55,7 +55,7 @@ pub fn extract_auth_header(auth_header: Option<&str>) -> Result { /// For MVP, this does: /// - Decode the JWT payload /// - Check expiration -/// - Validate audience matches our service DID +/// - Optionally validate audience matches expected DID /// /// Full implementation would also: /// - Resolve the issuer's signing key from their PDS @@ -110,6 +110,38 @@ pub fn validate_service_auth( Ok(claims) } +/// Decode service auth JWT without audience validation +/// Used for uploadVideo where the token's audience is the user's PDS DID, +/// not the video service DID. The video service forwards this token to the PDS. +pub fn decode_service_auth(token: &str) -> Result { + // Split JWT into parts + let parts: Vec<&str> = token.split('.').collect(); + if parts.len() != 3 { + return Err(Error::Unauthorized("Invalid JWT format".to_string())); + } + + // Decode the payload (middle part) + let payload_bytes = URL_SAFE_NO_PAD + .decode(parts[1]) + .map_err(|e| Error::Unauthorized(format!("Failed to decode JWT payload: {}", e)))?; + + let claims: ServiceAuthClaims = serde_json::from_slice(&payload_bytes) + .map_err(|e| Error::Unauthorized(format!("Failed to parse JWT claims: {}", e)))?; + + debug!( + "Service auth (no aud check): iss={}, sub={:?}, aud={}, lxm={:?}, user_did={}", + claims.iss, claims.sub, claims.aud, claims.lxm, claims.user_did() + ); + + // Check expiration + let now = chrono::Utc::now().timestamp(); + if claims.exp < now { + return Err(Error::Unauthorized("Token has expired".to_string())); + } + + Ok(claims) +} + /// Extract the user DID from an Authorization header pub fn get_user_did(auth_header: Option<&str>, service_did: &str) -> Result { let token = extract_auth_header(auth_header)?; diff --git a/rsky-video/src/bunny/mod.rs b/rsky-video/src/bunny/mod.rs index ef0ecc11..c32e6c97 100644 --- a/rsky-video/src/bunny/mod.rs +++ b/rsky-video/src/bunny/mod.rs @@ -174,4 +174,35 @@ impl BunnyClient { pub fn pull_zone(&self) -> &str { &self.pull_zone } + + /// Download the original video file from Bunny CDN + /// Returns the video bytes + pub async fn download_video(&self, video_id: &str) -> Result { + // The original video is available at the CDN URL with /play.mp4 suffix + let url = format!( + "https://{}.b-cdn.net/{}/original", + self.pull_zone, video_id + ); + + debug!("Downloading video from Bunny: {}", url); + + let response = self + .client + .get(&url) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::BunnyApi(format!( + "Failed to download video: {} - {}", + status, body + ))); + } + + let bytes = response.bytes().await?; + info!("Downloaded {} bytes from Bunny", bytes.len()); + Ok(bytes) + } } diff --git a/rsky-video/src/bunny/types.rs b/rsky-video/src/bunny/types.rs index 705946d4..9d639bea 100644 --- a/rsky-video/src/bunny/types.rs +++ b/rsky-video/src/bunny/types.rs @@ -1,6 +1,6 @@ //! Bunny Stream API types -use serde::{Deserialize, Serialize}; +use serde::Deserialize; /// Response from creating a new video #[derive(Debug, Clone, Deserialize)] @@ -70,6 +70,7 @@ impl VideoInfo { #[serde(rename_all = "PascalCase")] pub struct WebhookPayload { /// Video library ID + #[allow(dead_code)] pub video_library_id: i64, /// Video GUID pub video_guid: String, @@ -118,6 +119,7 @@ impl WebhookPayload { } /// Bunny encoding status codes +#[allow(dead_code)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(i32)] pub enum BunnyStatus { diff --git a/rsky-video/src/db/mod.rs b/rsky-video/src/db/mod.rs index 6e4e1fbf..d9a17c92 100644 --- a/rsky-video/src/db/mod.rs +++ b/rsky-video/src/db/mod.rs @@ -7,7 +7,7 @@ use serde_json::Value as JsonValue; use tracing::info; use uuid::Uuid; -use crate::error::{Error, Result}; +use crate::error::Result; /// Video job record #[derive(Debug, Clone, Serialize, Deserialize)] @@ -17,6 +17,7 @@ pub struct VideoJob { pub did: String, pub bunny_video_id: Option, pub video_cid: Option, + pub pds_blob_ref: Option, pub state: String, pub progress: i32, pub blob_ref: Option, @@ -29,6 +30,7 @@ pub struct VideoJob { } /// Job state constants +#[allow(dead_code)] pub mod job_state { pub const CREATED: &str = "JOB_STATE_CREATED"; pub const UPLOADING: &str = "JOB_STATE_UPLOADING"; @@ -38,6 +40,7 @@ pub mod job_state { } /// Upload quota record +#[allow(dead_code)] #[derive(Debug, Clone)] pub struct UploadQuota { pub did: String, @@ -83,6 +86,14 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { ) .await?; + // Add pds_blob_ref column to store the real blob reference from the PDS + client + .execute( + "ALTER TABLE video_jobs ADD COLUMN IF NOT EXISTS pds_blob_ref JSONB", + &[], + ) + .await?; + // Create index on job_id client .execute( @@ -167,7 +178,7 @@ pub async fn create_job( r#" INSERT INTO video_jobs (job_id, did, original_filename, file_size) VALUES ($1, $2, $3, $4) - RETURNING id, job_id, did, bunny_video_id, video_cid, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + RETURNING id, job_id, did, bunny_video_id, video_cid, pds_blob_ref, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at "#, &[&job_id, &did, &filename, &file_size], ) @@ -183,7 +194,7 @@ pub async fn get_job(pool: &Pool, job_id: Uuid) -> Result> { let row = client .query_opt( r#" - SELECT id, job_id, did, bunny_video_id, video_cid, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at + SELECT id, job_id, did, bunny_video_id, video_cid, pds_blob_ref, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at FROM video_jobs WHERE job_id = $1 "#, @@ -201,7 +212,7 @@ pub async fn get_job_by_bunny_id(pool: &Pool, bunny_video_id: &str) -> Result Result<()> { + let client = pool.get().await?; + + client + .execute( + r#" + UPDATE video_jobs + SET pds_blob_ref = $2, video_cid = $3, updated_at = NOW() + WHERE job_id = $1 + "#, + &[&job_id, &pds_blob_ref, &video_cid], + ) + .await?; + + Ok(()) +} + /// Update job state pub async fn update_job_state(pool: &Pool, job_id: Uuid, state: &str, progress: i32) -> Result<()> { let client = pool.get().await?; @@ -400,14 +429,15 @@ fn row_to_job(row: &tokio_postgres::Row) -> VideoJob { did: row.get(2), bunny_video_id: row.get(3), video_cid: row.get(4), - state: row.get(5), - progress: row.get(6), - blob_ref: row.get(7), - error: row.get(8), - message: row.get(9), - original_filename: row.get(10), - file_size: row.get(11), - created_at: row.get(12), - updated_at: row.get(13), + pds_blob_ref: row.get(5), + state: row.get(6), + progress: row.get(7), + blob_ref: row.get(8), + error: row.get(9), + message: row.get(10), + original_filename: row.get(11), + file_size: row.get(12), + created_at: row.get(13), + updated_at: row.get(14), } } diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs index 47f7994e..c2d2bd52 100644 --- a/rsky-video/src/main.rs +++ b/rsky-video/src/main.rs @@ -24,6 +24,7 @@ mod bunny; mod config; mod db; mod error; +mod pds; mod xrpc; pub use config::AppConfig; @@ -34,6 +35,7 @@ pub struct AppState { pub config: AppConfig, pub db_pool: deadpool_postgres::Pool, pub bunny_client: bunny::BunnyClient, + pub pds_client: pds::PdsClient, pub http_client: reqwest::Client, } @@ -80,11 +82,15 @@ async fn main() -> color_eyre::Result<()> { .timeout(std::time::Duration::from_secs(300)) .build()?; + // Initialize PDS client + let pds_client = pds::PdsClient::new(http_client.clone()); + // Create shared state let state = Arc::new(AppState { config: config.clone(), db_pool, bunny_client, + pds_client, http_client, }); diff --git a/rsky-video/src/pds/mod.rs b/rsky-video/src/pds/mod.rs new file mode 100644 index 00000000..41d9bfdb --- /dev/null +++ b/rsky-video/src/pds/mod.rs @@ -0,0 +1,277 @@ +//! PDS (Personal Data Server) client for uploading blobs +//! +//! Handles uploading video blobs to users' PDS instances using the service auth token +//! provided by the client. The token contains the PDS DID as the audience, which allows +//! the video service to forward the token when uploading to the PDS. + +use atrium_api::types::{BlobRef, TypedBlobRef}; +use atrium_xrpc::{HttpClient, XrpcClient}; +use atrium_xrpc::http::{Request, Response}; +use atrium_xrpc::types::AuthorizationToken; +use atrium_xrpc_client::reqwest::ReqwestClient; +use base64::{Engine as _, engine::general_purpose::URL_SAFE_NO_PAD}; +use bytes::Bytes; +use serde::Deserialize; +use serde_json::Value as JsonValue; +use tracing::{debug, info}; + +use crate::error::{Error, Result}; + +/// JWT claims from service auth token +#[derive(Debug, Deserialize)] +struct ServiceAuthClaims { + /// Issuer (user's DID, signed by their PDS) + iss: String, + /// Audience (PDS DID - where the blob should be uploaded) + aud: String, + /// Subject (user's DID, optional) + #[serde(default)] + sub: Option, + /// Lexicon method + #[serde(default)] + #[allow(dead_code)] + lxm: Option, +} + +/// Response from DID document resolution +#[derive(Debug, Deserialize)] +struct DidDocument { + #[serde(default)] + service: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct DidService { + id: String, + #[serde(rename = "type")] + service_type: String, + service_endpoint: String, +} + +/// Wrapper XRPC client that uses a bearer token for auth +struct AuthenticatedClient { + token: String, + inner: ReqwestClient, +} + +impl AuthenticatedClient { + fn new(base_uri: &str, token: String) -> Self { + Self { + token, + inner: ReqwestClient::new(base_uri), + } + } +} + +impl HttpClient for AuthenticatedClient { + async fn send_http( + &self, + request: Request>, + ) -> std::result::Result>, Box> { + self.inner.send_http(request).await + } +} + +impl XrpcClient for AuthenticatedClient { + fn base_uri(&self) -> String { + self.inner.base_uri() + } + + async fn authorization_token(&self, _is_refresh: bool) -> Option { + Some(AuthorizationToken::Bearer(self.token.clone())) + } +} + +/// Client for interacting with PDS instances +pub struct PdsClient { + http_client: reqwest::Client, +} + +impl PdsClient { + pub fn new(http_client: reqwest::Client) -> Self { + Self { http_client } + } + + /// Decode a JWT token without verification to extract claims + /// The PDS will verify the token when we use it for upload + fn decode_token_claims(token: &str) -> Result { + let parts: Vec<&str> = token.split('.').collect(); + if parts.len() != 3 { + return Err(Error::Unauthorized("Invalid JWT format".to_string())); + } + + let payload = URL_SAFE_NO_PAD + .decode(parts[1]) + .map_err(|e| Error::Unauthorized(format!("Invalid JWT payload encoding: {}", e)))?; + + let claims: ServiceAuthClaims = serde_json::from_slice(&payload) + .map_err(|e| Error::Unauthorized(format!("Invalid JWT claims: {}", e)))?; + + Ok(claims) + } + + /// Extract the PDS DID from a service auth token + pub fn extract_pds_did(token: &str) -> Result { + let claims = Self::decode_token_claims(token)?; + Ok(claims.aud) + } + + /// Extract the user DID from a service auth token + pub fn extract_user_did(token: &str) -> Result { + let claims = Self::decode_token_claims(token)?; + // Use sub if present, otherwise use iss + Ok(claims.sub.unwrap_or(claims.iss)) + } + + /// Resolve a DID to find the PDS endpoint + pub async fn resolve_pds_endpoint(&self, did: &str) -> Result { + // For did:web, we can derive the endpoint directly from the domain + // did:web:example.com -> https://example.com + // This is the standard AT Protocol approach - the PDS endpoint is the domain itself + if did.starts_with("did:web:") { + let domain = did.strip_prefix("did:web:").unwrap(); + let endpoint = format!("https://{}", domain); + + // Optionally try to resolve the DID document for additional verification, + // but fall back to the direct endpoint if it doesn't exist + let url = format!("https://{}/.well-known/did.json", domain); + debug!("Attempting to resolve did:web via: {}", url); + + match self.http_client.get(&url).send().await { + Ok(response) if response.status().is_success() => { + if let Ok(doc) = response.json::().await { + if let Ok(pds_endpoint) = self.extract_pds_from_did_doc(&doc, did) { + info!("Resolved {} via DID document to: {}", did, pds_endpoint); + return Ok(pds_endpoint); + } + } + } + _ => { + // DID document not found or invalid - use direct endpoint + debug!("No DID document found for {}, using direct endpoint: {}", did, endpoint); + } + } + + // Fall back to direct endpoint derivation + info!("Using direct endpoint for {}: {}", did, endpoint); + return Ok(endpoint); + } + + // For did:plc, resolve via plc.directory + if did.starts_with("did:plc:") { + let url = format!("https://plc.directory/{}", did); + debug!("Resolving did:plc via plc.directory: {}", url); + + let response = self.http_client.get(&url).send().await?; + if !response.status().is_success() { + return Err(Error::Internal(format!( + "Failed to resolve DID {}: {}", + did, + response.status() + ))); + } + + let doc: DidDocument = response.json().await?; + return self.extract_pds_from_did_doc(&doc, did); + } + + Err(Error::Internal(format!("Unsupported DID method: {}", did))) + } + + /// Extract PDS endpoint from a DID document + fn extract_pds_from_did_doc(&self, doc: &DidDocument, did: &str) -> Result { + for service in &doc.service { + if service.id.ends_with("#atproto_pds") + && service.service_type == "AtprotoPersonalDataServer" + { + info!("Resolved {} to PDS: {}", did, service.service_endpoint); + return Ok(service.service_endpoint.clone()); + } + } + + Err(Error::Internal(format!( + "Could not find PDS endpoint for DID: {}", + did + ))) + } + + /// Upload a blob to a PDS using the provided service auth token + /// + /// The token's `aud` claim must be the PDS DID. The PDS will validate + /// the token signature before accepting the upload. + /// + /// # Arguments + /// * `token` - Service auth token with PDS DID as audience + /// * `data` - The blob data to upload + /// * `mime_type` - MIME type of the blob + /// + /// # Returns + /// The blob reference from the PDS (with valid CID) + pub async fn upload_blob( + &self, + token: &str, + data: Bytes, + mime_type: &str, + ) -> Result { + // Extract PDS DID from token + let pds_did = Self::extract_pds_did(token)?; + info!("Uploading blob to PDS: {}", pds_did); + + // Resolve PDS endpoint + let pds_endpoint = self.resolve_pds_endpoint(&pds_did).await?; + debug!("PDS endpoint: {}", pds_endpoint); + + // Create authenticated client + let client = AuthenticatedClient::new(&pds_endpoint, token.to_string()); + let service = atrium_api::client::AtpServiceClient::new(client); + + // Upload blob + let size = data.len(); + debug!("Uploading {} bytes ({}) to {}", size, mime_type, pds_endpoint); + + let output = service + .service + .com + .atproto + .repo + .upload_blob(data.to_vec()) + .await + .map_err(|e| Error::Internal(format!("PDS upload failed: {}", e)))?; + + info!("Blob uploaded to PDS: size={}", size); + + Ok(output.data.blob) + } +} + +/// Extract the CID string from a BlobRef +pub fn extract_cid(blob: &BlobRef) -> Option { + match blob { + BlobRef::Typed(TypedBlobRef::Blob(b)) => Some(b.r#ref.0.to_string()), + BlobRef::Untyped(u) => Some(u.cid.clone()), + } +} + +/// Convert atrium BlobRef to JSON value for storage +pub fn blob_ref_to_json(blob: &BlobRef) -> JsonValue { + match blob { + BlobRef::Typed(TypedBlobRef::Blob(b)) => { + serde_json::json!({ + "$type": "blob", + "ref": { + "$link": b.r#ref.0.to_string() + }, + "mimeType": b.mime_type, + "size": b.size + }) + } + BlobRef::Untyped(u) => { + // Legacy format - shouldn't happen for new uploads + serde_json::json!({ + "cid": u.cid, + "mimeType": u.mime_type + }) + } + } +} diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 1814a194..3cdc79db 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -10,10 +10,8 @@ use axum::{ response::Response, }; use bytes::Bytes; -use cid::Cid; -use multihash_codetable::{Code, MultihashDigest}; use serde::{Deserialize, Serialize}; -use serde_json::{Value as JsonValue, json}; +use serde_json::Value as JsonValue; use tracing::{debug, error, info, warn}; use uuid::Uuid; @@ -22,21 +20,9 @@ use crate::{ bunny::WebhookPayload, db::{self, job_state}, error::{Error, Result}, + pds, }; -/// Generate a CIDv1 (raw codec, SHA-256) from video bytes -/// Returns base32-encoded CID string starting with 'b' (e.g., bafkreibjfgx...) -fn generate_video_cid(data: &[u8]) -> String { - // Create a multihash using SHA2-256 (code 0x12) - let mh = Code::Sha2_256.digest(data); - - // Create CIDv1 with raw codec (0x55) - let cid = Cid::new_v1(0x55, mh); - - // Encode as base32lower (multibase 'b' prefix) - cid.to_string() -} - /// Query parameters for getUploadLimits #[derive(Debug, Deserialize)] pub struct GetUploadLimitsParams {} @@ -145,14 +131,16 @@ pub async fn upload_video( Query(params): Query, body: Bytes, ) -> Result> { - // Validate service auth + // Extract service auth token let auth_header = headers .get(header::AUTHORIZATION) .and_then(|v| v.to_str().ok()); - // The token should be for com.atproto.repo.uploadBlob + // The token should be for com.atproto.repo.uploadBlob with aud: user's PDS DID + // We don't validate audience here since the token is meant for the PDS, not us. + // We forward this token to the PDS for blob upload. let token = auth::extract_auth_header(auth_header)?; - let claims = auth::validate_service_auth(&token, &state.config.service_did, None)?; + let claims = auth::decode_service_auth(&token)?; // Verify the DID matches if claims.user_did() != params.did { @@ -205,10 +193,36 @@ pub async fn upload_video( let job_id = job.job_id; info!("Created job: {}", job_id); - // Generate CID from video content (before upload consumes the bytes) - let video_cid = generate_video_cid(&body); - info!("Generated video CID: {}", video_cid); + // STEP 1: Upload blob to user's PDS FIRST + // The token's `aud` claim contains the PDS DID, and the PDS will validate + // the token signature. This gives us a real, content-addressed blob reference. + info!("Uploading blob to PDS for user {}", user_did); + let pds_blob_ref = match state + .pds_client + .upload_blob(&token, body.clone(), "video/mp4") + .await + { + Ok(blob) => blob, + Err(e) => { + error!("Failed to upload blob to PDS: {}", e); + db::fail_job(&state.db_pool, job_id, &format!("PDS upload failed: {}", e)).await?; + return Err(e); + } + }; + + // Extract the CID from the PDS blob_ref - this is the real content-addressed CID + let video_cid = pds::extract_cid(&pds_blob_ref).ok_or_else(|| { + Error::Internal("PDS returned invalid blob reference".to_string()) + })?; + info!("PDS returned blob with CID: {}", video_cid); + + // Convert to JSON for storage + let pds_blob_json = pds::blob_ref_to_json(&pds_blob_ref); + // Store the PDS blob_ref in database + db::set_pds_blob_ref(&state.db_pool, job_id, pds_blob_json.clone(), &video_cid).await?; + + // STEP 2: Upload to Bunny Stream for transcoding // Create video in Bunny Stream let title = format!("{}_{}", user_did, params.name); let bunny_video = match state.bunny_client.create_video(&title).await { @@ -223,7 +237,7 @@ pub async fn upload_video( let bunny_video_id = bunny_video.guid.clone(); db::set_bunny_video_id(&state.db_pool, job_id, &bunny_video_id, &video_cid).await?; - // Upload video to Bunny + // Upload video to Bunny for transcoding if let Err(e) = state.bunny_client.upload_video(&bunny_video_id, body).await { error!("Failed to upload to Bunny: {}", e); db::fail_job(&state.db_pool, job_id, &e.to_string()).await?; @@ -237,8 +251,8 @@ pub async fn upload_video( db::increment_quota(&state.db_pool, user_did, file_size).await?; info!( - "Video uploaded to Bunny: job={}, bunny_id={}", - job_id, bunny_video_id + "Video uploaded: job={}, cid={}, bunny_id={}", + job_id, video_cid, bunny_video_id ); // Return flat JobStatus (not wrapped) - client expects this format @@ -343,23 +357,16 @@ pub async fn bunny_webhook( // Video encoding is complete info!("Video encoding complete: job={}", job.job_id); - // Get the content CID from the job (generated during upload) + // Get the content CID from the job (from PDS upload) let video_cid = job.video_cid.ok_or_else(|| { Error::Internal("Job missing video CID".to_string()) })?; - // Get video info from Bunny for file size - let video_info = state.bunny_client.get_video(&payload.video_guid).await?; - - // Create a blob ref with the proper content-addressed CID - let blob_ref = json!({ - "$type": "blob", - "ref": { - "$link": video_cid - }, - "mimeType": "video/mp4", - "size": video_info.storage_size - }); + // Use the PDS blob_ref that was stored during upload + // This is the real blob reference from the user's PDS + let blob_ref = job.pds_blob_ref.ok_or_else(|| { + Error::Internal("Job missing PDS blob reference".to_string()) + })?; // Save the mapping for URL proxy: (did, cid) -> bunny_video_id db::save_video_mapping( @@ -370,7 +377,7 @@ pub async fn bunny_webhook( ) .await?; - // Mark job as complete + // Mark job as complete with the PDS blob_ref db::complete_job(&state.db_pool, job.job_id, blob_ref).await?; info!("Job completed: job={}, cid={}", job.job_id, video_cid); From 10026bc4be946eaecf5c1535bacccf46050571f2 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Wed, 21 Jan 2026 19:24:12 -0500 Subject: [PATCH 19/42] Use crates.io atrium instead of local path --- rsky-video/Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml index 3ab0b369..f283a87a 100644 --- a/rsky-video/Cargo.toml +++ b/rsky-video/Cargo.toml @@ -48,10 +48,10 @@ urlencoding = "2" cid = "0.11" multihash-codetable = { version = "0.1", features = ["sha2"] } -# AT Protocol client (local atrium) -atrium-api = { path = "/Users/rudyfraser/Projects/atrium/atrium-api", features = ["agent"] } -atrium-xrpc = { path = "/Users/rudyfraser/Projects/atrium/atrium-xrpc" } -atrium-xrpc-client = { path = "/Users/rudyfraser/Projects/atrium/atrium-xrpc-client" } +# AT Protocol client (atrium from crates.io) +atrium-api = { version = "0.25", features = ["agent"] } +atrium-xrpc = "0.12" +atrium-xrpc-client = "0.5" # Logging/tracing tracing = { version = "0.1", features = ["release_max_level_debug"] } From 02b0f1d4dcc919c4439abb0f09314d75ed50b539 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 22 Jan 2026 10:05:32 -0500 Subject: [PATCH 20/42] Add service auth signing for PDS blob uploads Video service now has its own identity (did:web:video.blacksky.community) and creates service auth tokens to upload blobs to user PDSs. Changes: - Add signing module with K-256 JWT signing - Load signing key from SIGNING_KEY_PATH env var - Create service auth tokens with iss=video_service, aud=pds, sub=user - Update PDS client to resolve user DID to their PDS endpoint - Update Cargo.toml with k256 and sec1 dependencies --- rsky-video/Cargo.toml | 3 + rsky-video/src/config.rs | 3 + rsky-video/src/main.rs | 23 ++++++ rsky-video/src/pds/mod.rs | 52 +++++++++---- rsky-video/src/signing/mod.rs | 135 ++++++++++++++++++++++++++++++++++ rsky-video/src/xrpc/mod.rs | 11 ++- 6 files changed, 208 insertions(+), 19 deletions(-) create mode 100644 rsky-video/src/signing/mod.rs diff --git a/rsky-video/Cargo.toml b/rsky-video/Cargo.toml index f283a87a..7a533d0b 100644 --- a/rsky-video/Cargo.toml +++ b/rsky-video/Cargo.toml @@ -34,6 +34,9 @@ deadpool-postgres = "0.13" jsonwebtoken = "9" base64 = { version = "0.22", features = ["std"] } rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs"] } +k256 = { version = "0.13", features = ["ecdsa", "pem", "pkcs8"] } +sec1 = { version = "0.7", features = ["pem"] } +rand = "0.8" # Utilities chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } diff --git a/rsky-video/src/config.rs b/rsky-video/src/config.rs index 859e421b..5d5e2019 100644 --- a/rsky-video/src/config.rs +++ b/rsky-video/src/config.rs @@ -24,6 +24,8 @@ pub struct AppConfig { pub service_did: String, /// Public URL of this service pub public_url: String, + /// Path to the signing key PEM file + pub signing_key_path: Option, /// Maximum video file size in bytes (default: 100MB) pub max_video_size: u64, @@ -54,6 +56,7 @@ impl AppConfig { .unwrap_or_else(|_| "did:web:video.blacksky.community".to_string()), public_url: env::var("VIDEO_PUBLIC_URL") .unwrap_or_else(|_| "https://video.blacksky.community".to_string()), + signing_key_path: env::var("SIGNING_KEY_PATH").ok(), max_video_size: env::var("MAX_VIDEO_SIZE") .ok() diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs index c2d2bd52..e102a552 100644 --- a/rsky-video/src/main.rs +++ b/rsky-video/src/main.rs @@ -25,6 +25,7 @@ mod config; mod db; mod error; mod pds; +mod signing; mod xrpc; pub use config::AppConfig; @@ -37,6 +38,7 @@ pub struct AppState { pub bunny_client: bunny::BunnyClient, pub pds_client: pds::PdsClient, pub http_client: reqwest::Client, + pub signer: Option, } #[tokio::main] @@ -85,6 +87,26 @@ async fn main() -> color_eyre::Result<()> { // Initialize PDS client let pds_client = pds::PdsClient::new(http_client.clone()); + // Initialize service auth signer if key is configured + let signer = match &config.signing_key_path { + Some(path) => { + match signing::ServiceAuthSigner::from_pem_file(path, config.service_did.clone()) { + Ok(s) => { + info!("Service auth signing enabled"); + Some(s) + } + Err(e) => { + tracing::warn!("Failed to load signing key, PDS uploads will not work: {}", e); + None + } + } + } + None => { + tracing::warn!("No signing key configured (SIGNING_KEY_PATH), PDS uploads will not work"); + None + } + }; + // Create shared state let state = Arc::new(AppState { config: config.clone(), @@ -92,6 +114,7 @@ async fn main() -> color_eyre::Result<()> { bunny_client, pds_client, http_client, + signer, }); // Build router diff --git a/rsky-video/src/pds/mod.rs b/rsky-video/src/pds/mod.rs index 41d9bfdb..eea4be43 100644 --- a/rsky-video/src/pds/mod.rs +++ b/rsky-video/src/pds/mod.rs @@ -1,8 +1,8 @@ //! PDS (Personal Data Server) client for uploading blobs //! -//! Handles uploading video blobs to users' PDS instances using the service auth token -//! provided by the client. The token contains the PDS DID as the audience, which allows -//! the video service to forward the token when uploading to the PDS. +//! Handles uploading video blobs to users' PDS instances. The video service +//! creates its own service auth tokens (signed with its private key) to upload +//! blobs on behalf of users. use atrium_api::types::{BlobRef, TypedBlobRef}; use atrium_xrpc::{HttpClient, XrpcClient}; @@ -16,6 +16,7 @@ use serde_json::Value as JsonValue; use tracing::{debug, info}; use crate::error::{Error, Result}; +use crate::signing::ServiceAuthSigner; /// JWT claims from service auth token #[derive(Debug, Deserialize)] @@ -196,13 +197,16 @@ impl PdsClient { ))) } - /// Upload a blob to a PDS using the provided service auth token + /// Upload a blob to a PDS using a service auth token created by the video service /// - /// The token's `aud` claim must be the PDS DID. The PDS will validate - /// the token signature before accepting the upload. + /// The video service creates its own service auth token with: + /// - iss: video service DID + /// - aud: user's PDS DID + /// - sub: user's DID /// /// # Arguments - /// * `token` - Service auth token with PDS DID as audience + /// * `signer` - The service auth signer + /// * `user_did` - The user's DID /// * `data` - The blob data to upload /// * `mime_type` - MIME type of the blob /// @@ -210,25 +214,30 @@ impl PdsClient { /// The blob reference from the PDS (with valid CID) pub async fn upload_blob( &self, - token: &str, + signer: &ServiceAuthSigner, + user_did: &str, data: Bytes, + #[allow(unused_variables)] mime_type: &str, ) -> Result { - // Extract PDS DID from token - let pds_did = Self::extract_pds_did(token)?; - info!("Uploading blob to PDS: {}", pds_did); + // Resolve user's PDS endpoint from their DID + let pds_endpoint = self.resolve_pds_endpoint(user_did).await?; - // Resolve PDS endpoint - let pds_endpoint = self.resolve_pds_endpoint(&pds_did).await?; - debug!("PDS endpoint: {}", pds_endpoint); + // Derive PDS DID from endpoint (e.g., https://blacksky.app -> did:web:blacksky.app) + let pds_did = self.endpoint_to_did(&pds_endpoint)?; + info!("Uploading blob to PDS: {} ({})", pds_did, pds_endpoint); + + // Create service auth token for this PDS + let token = signer.create_pds_upload_token(&pds_did, user_did, None)?; + debug!("Created service auth token for PDS upload"); // Create authenticated client - let client = AuthenticatedClient::new(&pds_endpoint, token.to_string()); + let client = AuthenticatedClient::new(&pds_endpoint, token); let service = atrium_api::client::AtpServiceClient::new(client); // Upload blob let size = data.len(); - debug!("Uploading {} bytes ({}) to {}", size, mime_type, pds_endpoint); + debug!("Uploading {} bytes to {}", size, pds_endpoint); let output = service .service @@ -243,6 +252,17 @@ impl PdsClient { Ok(output.data.blob) } + + /// Convert an endpoint URL to a did:web + fn endpoint_to_did(&self, endpoint: &str) -> Result { + let url = url::Url::parse(endpoint) + .map_err(|e| Error::Internal(format!("Invalid endpoint URL: {}", e)))?; + + let host = url.host_str() + .ok_or_else(|| Error::Internal("Endpoint has no host".to_string()))?; + + Ok(format!("did:web:{}", host)) + } } /// Extract the CID string from a BlobRef diff --git a/rsky-video/src/signing/mod.rs b/rsky-video/src/signing/mod.rs new file mode 100644 index 00000000..c8928f22 --- /dev/null +++ b/rsky-video/src/signing/mod.rs @@ -0,0 +1,135 @@ +//! Service authentication token signing +//! +//! The video service needs to create service auth tokens to upload blobs +//! to users' PDS instances. This module handles loading the signing key +//! and creating properly signed JWTs. + +use base64::{Engine as _, engine::general_purpose::URL_SAFE_NO_PAD}; +use k256::ecdsa::{SigningKey, Signature, signature::Signer}; +use k256::pkcs8::DecodePrivateKey; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; +use tracing::{debug, info}; + +use crate::error::{Error, Result}; + +/// JWT header for ES256K (secp256k1) +#[derive(Debug, Serialize)] +struct JwtHeader { + alg: &'static str, + typ: &'static str, +} + +/// Service auth token claims +#[derive(Debug, Serialize, Deserialize)] +pub struct ServiceAuthClaims { + /// Issued at timestamp (seconds since epoch) + pub iat: i64, + /// Expiration timestamp (seconds since epoch) + pub exp: i64, + /// Issuer - the video service DID + pub iss: String, + /// Audience - the target PDS DID + pub aud: String, + /// Subject - the user's DID (on whose behalf we're acting) + pub sub: String, + /// Lexicon method being called + pub lxm: String, + /// Unique token ID + pub jti: String, +} + +/// Signer for creating service auth tokens +pub struct ServiceAuthSigner { + signing_key: SigningKey, + service_did: String, +} + +impl ServiceAuthSigner { + /// Load the signing key from a PEM file + pub fn from_pem_file>(path: P, service_did: String) -> Result { + let pem_content = fs::read_to_string(&path) + .map_err(|e| Error::Internal(format!("Failed to read signing key: {}", e)))?; + + // Parse EC private key in PEM format + // First try PKCS#8 format, then fall back to SEC1 format + let signing_key = SigningKey::from_pkcs8_pem(&pem_content) + .or_else(|_| { + // Try SEC1 format (EC PRIVATE KEY) + use k256::SecretKey; + SecretKey::from_sec1_pem(&pem_content) + .map(|sk| SigningKey::from(sk)) + }) + .map_err(|e| Error::Internal(format!("Failed to parse signing key: {}", e)))?; + + info!("Loaded signing key for {}", service_did); + + Ok(Self { + signing_key, + service_did, + }) + } + + /// Create a service auth token for uploading a blob to a PDS + /// + /// # Arguments + /// * `pds_did` - The DID of the target PDS + /// * `user_did` - The DID of the user on whose behalf we're acting + /// * `ttl_seconds` - How long the token should be valid (default: 300s / 5min) + pub fn create_pds_upload_token( + &self, + pds_did: &str, + user_did: &str, + ttl_seconds: Option, + ) -> Result { + let now = chrono::Utc::now().timestamp(); + let ttl = ttl_seconds.unwrap_or(300); // 5 minutes default + + let claims = ServiceAuthClaims { + iat: now, + exp: now + ttl, + iss: self.service_did.clone(), + aud: pds_did.to_string(), + sub: user_did.to_string(), + lxm: "com.atproto.repo.uploadBlob".to_string(), + jti: uuid::Uuid::new_v4().to_string(), + }; + + debug!( + "Creating service auth token: iss={}, aud={}, sub={}", + claims.iss, claims.aud, claims.sub + ); + + self.sign_jwt(&claims) + } + + /// Sign a JWT with the service's private key + fn sign_jwt(&self, claims: &ServiceAuthClaims) -> Result { + // Create header + let header = JwtHeader { + alg: "ES256K", + typ: "JWT", + }; + + // Encode header and payload + let header_json = serde_json::to_string(&header) + .map_err(|e| Error::Internal(format!("Failed to serialize header: {}", e)))?; + let claims_json = serde_json::to_string(claims) + .map_err(|e| Error::Internal(format!("Failed to serialize claims: {}", e)))?; + + let header_b64 = URL_SAFE_NO_PAD.encode(header_json.as_bytes()); + let claims_b64 = URL_SAFE_NO_PAD.encode(claims_json.as_bytes()); + + // Create signing input + let signing_input = format!("{}.{}", header_b64, claims_b64); + + // Sign with secp256k1 + let signature: Signature = self.signing_key.sign(signing_input.as_bytes()); + let sig_bytes = signature.to_bytes(); + let sig_b64 = URL_SAFE_NO_PAD.encode(&sig_bytes); + + // Combine into JWT + Ok(format!("{}.{}", signing_input, sig_b64)) + } +} diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 3cdc79db..8af32cef 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -194,12 +194,17 @@ pub async fn upload_video( info!("Created job: {}", job_id); // STEP 1: Upload blob to user's PDS FIRST - // The token's `aud` claim contains the PDS DID, and the PDS will validate - // the token signature. This gives us a real, content-addressed blob reference. + // We create our own service auth token (signed by video service) to upload + // to the user's PDS on their behalf. info!("Uploading blob to PDS for user {}", user_did); + + let signer = state.signer.as_ref().ok_or_else(|| { + Error::Internal("Video service signing key not configured".to_string()) + })?; + let pds_blob_ref = match state .pds_client - .upload_blob(&token, body.clone(), "video/mp4") + .upload_blob(signer, user_did, body.clone(), "video/mp4") .await { Ok(blob) => blob, From 4a036c48fec7d3c2e1cf1a1267c05fd79e4adfae Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 22 Jan 2026 10:25:45 -0500 Subject: [PATCH 21/42] Use direct HTTP for PDS blob upload instead of atrium client The atrium XRPC client was not properly forwarding the Authorization header, resulting in 'Bearer did:plc:...' being sent instead of the actual JWT token. Changed to direct reqwest HTTP calls with explicit headers for blob upload. --- rsky-video/src/pds/mod.rs | 115 ++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/rsky-video/src/pds/mod.rs b/rsky-video/src/pds/mod.rs index eea4be43..8c09ffcc 100644 --- a/rsky-video/src/pds/mod.rs +++ b/rsky-video/src/pds/mod.rs @@ -5,10 +5,6 @@ //! blobs on behalf of users. use atrium_api::types::{BlobRef, TypedBlobRef}; -use atrium_xrpc::{HttpClient, XrpcClient}; -use atrium_xrpc::http::{Request, Response}; -use atrium_xrpc::types::AuthorizationToken; -use atrium_xrpc_client::reqwest::ReqwestClient; use base64::{Engine as _, engine::general_purpose::URL_SAFE_NO_PAD}; use bytes::Bytes; use serde::Deserialize; @@ -50,40 +46,6 @@ struct DidService { service_endpoint: String, } -/// Wrapper XRPC client that uses a bearer token for auth -struct AuthenticatedClient { - token: String, - inner: ReqwestClient, -} - -impl AuthenticatedClient { - fn new(base_uri: &str, token: String) -> Self { - Self { - token, - inner: ReqwestClient::new(base_uri), - } - } -} - -impl HttpClient for AuthenticatedClient { - async fn send_http( - &self, - request: Request>, - ) -> std::result::Result>, Box> { - self.inner.send_http(request).await - } -} - -impl XrpcClient for AuthenticatedClient { - fn base_uri(&self) -> String { - self.inner.base_uri() - } - - async fn authorization_token(&self, _is_refresh: bool) -> Option { - Some(AuthorizationToken::Bearer(self.token.clone())) - } -} - /// Client for interacting with PDS instances pub struct PdsClient { http_client: reqwest::Client, @@ -217,7 +179,6 @@ impl PdsClient { signer: &ServiceAuthSigner, user_did: &str, data: Bytes, - #[allow(unused_variables)] mime_type: &str, ) -> Result { // Resolve user's PDS endpoint from their DID @@ -231,26 +192,70 @@ impl PdsClient { let token = signer.create_pds_upload_token(&pds_did, user_did, None)?; debug!("Created service auth token for PDS upload"); - // Create authenticated client - let client = AuthenticatedClient::new(&pds_endpoint, token); - let service = atrium_api::client::AtpServiceClient::new(client); - - // Upload blob + // Upload blob via direct HTTP request (not using atrium client) + // atrium's client has issues with the auth header for this use case + let upload_url = format!("{}/xrpc/com.atproto.repo.uploadBlob", pds_endpoint); let size = data.len(); - debug!("Uploading {} bytes to {}", size, pds_endpoint); - - let output = service - .service - .com - .atproto - .repo - .upload_blob(data.to_vec()) + debug!("Uploading {} bytes to {}", size, upload_url); + + let response = self.http_client + .post(&upload_url) + .header("Authorization", format!("Bearer {}", token)) + .header("Content-Type", mime_type) + .body(data.to_vec()) + .send() .await - .map_err(|e| Error::Internal(format!("PDS upload failed: {}", e)))?; + .map_err(|e| Error::Internal(format!("PDS upload request failed: {}", e)))?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(Error::Internal(format!( + "PDS upload failed: {} - {}", + status, body + ))); + } + + // Parse response + #[derive(Deserialize)] + struct UploadBlobResponse { + blob: BlobRefResponse, + } + + #[derive(Deserialize)] + #[serde(rename_all = "camelCase")] + struct BlobRefResponse { + #[serde(rename = "$type")] + blob_type: Option, + #[serde(rename = "ref")] + cid_ref: CidRef, + mime_type: String, + size: u64, + } + + #[derive(Deserialize)] + struct CidRef { + #[serde(rename = "$link")] + link: String, + } + + let upload_response: UploadBlobResponse = response.json().await + .map_err(|e| Error::Internal(format!("Failed to parse PDS response: {}", e)))?; + + info!("Blob uploaded to PDS: size={}, cid={}", size, upload_response.blob.cid_ref.link); - info!("Blob uploaded to PDS: size={}", size); + // Convert to atrium BlobRef format + // We need to construct the proper BlobRef type + let blob_ref = BlobRef::Typed(TypedBlobRef::Blob(atrium_api::types::Blob { + r#ref: atrium_api::types::CidLink( + cid::Cid::try_from(upload_response.blob.cid_ref.link.as_str()) + .map_err(|e| Error::Internal(format!("Invalid CID from PDS: {}", e)))? + ), + mime_type: upload_response.blob.mime_type, + size: upload_response.blob.size as usize, + })); - Ok(output.data.blob) + Ok(blob_ref) } /// Convert an endpoint URL to a did:web From f2ad602330b710d8ae4e8ac857fbb6e5b5f5b478 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 22 Jan 2026 10:40:50 -0500 Subject: [PATCH 22/42] Forward client's service auth token to PDS instead of creating own The PDS couldn't verify tokens signed by the video service because it doesn't resolve did:web DIDs for external services. New approach: client requests service auth token with aud=pds_did (not video_service_did), and we forward that token directly to the PDS. The PDS can verify it since it's signed by the user's own signing key. --- rsky-video/src/pds/mod.rs | 26 +++++++++++--------------- rsky-video/src/xrpc/mod.rs | 12 ++++-------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/rsky-video/src/pds/mod.rs b/rsky-video/src/pds/mod.rs index 8c09ffcc..40b3f008 100644 --- a/rsky-video/src/pds/mod.rs +++ b/rsky-video/src/pds/mod.rs @@ -12,7 +12,6 @@ use serde_json::Value as JsonValue; use tracing::{debug, info}; use crate::error::{Error, Result}; -use crate::signing::ServiceAuthSigner; /// JWT claims from service auth token #[derive(Debug, Deserialize)] @@ -159,24 +158,26 @@ impl PdsClient { ))) } - /// Upload a blob to a PDS using a service auth token created by the video service + /// Upload a blob to a PDS by forwarding the client's service auth token /// - /// The video service creates its own service auth token with: - /// - iss: video service DID + /// The client provides a service auth token from their PDS with: + /// - iss: user's DID /// - aud: user's PDS DID - /// - sub: user's DID + /// - lxm: com.atproto.repo.uploadBlob + /// + /// We forward this token to the PDS, which verifies it against the user's DID document. /// /// # Arguments - /// * `signer` - The service auth signer + /// * `client_token` - The service auth token from the client /// * `user_did` - The user's DID /// * `data` - The blob data to upload /// * `mime_type` - MIME type of the blob /// /// # Returns /// The blob reference from the PDS (with valid CID) - pub async fn upload_blob( + pub async fn upload_blob_with_token( &self, - signer: &ServiceAuthSigner, + client_token: &str, user_did: &str, data: Bytes, mime_type: &str, @@ -184,13 +185,8 @@ impl PdsClient { // Resolve user's PDS endpoint from their DID let pds_endpoint = self.resolve_pds_endpoint(user_did).await?; - // Derive PDS DID from endpoint (e.g., https://blacksky.app -> did:web:blacksky.app) - let pds_did = self.endpoint_to_did(&pds_endpoint)?; - info!("Uploading blob to PDS: {} ({})", pds_did, pds_endpoint); - - // Create service auth token for this PDS - let token = signer.create_pds_upload_token(&pds_did, user_did, None)?; - debug!("Created service auth token for PDS upload"); + info!("Uploading blob to PDS: {} using client token", pds_endpoint); + let token = client_token; // Upload blob via direct HTTP request (not using atrium client) // atrium's client has issues with the auth header for this use case diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 8af32cef..41fc13be 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -194,17 +194,13 @@ pub async fn upload_video( info!("Created job: {}", job_id); // STEP 1: Upload blob to user's PDS FIRST - // We create our own service auth token (signed by video service) to upload - // to the user's PDS on their behalf. - info!("Uploading blob to PDS for user {}", user_did); - - let signer = state.signer.as_ref().ok_or_else(|| { - Error::Internal("Video service signing key not configured".to_string()) - })?; + // Forward the client's service auth token to the PDS. + // The token should have aud: user's PDS DID (not video service). + info!("Uploading blob to PDS for user {} using client token", user_did); let pds_blob_ref = match state .pds_client - .upload_blob(signer, user_did, body.clone(), "video/mp4") + .upload_blob_with_token(&token, user_did, body.clone(), "video/mp4") .await { Ok(blob) => blob, From 1632e8f0b5111c575cd36af4ab371ad56367450d Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 22 Jan 2026 10:51:28 -0500 Subject: [PATCH 23/42] Increase video body limit to 5GB --- rsky-video/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsky-video/src/main.rs b/rsky-video/src/main.rs index e102a552..af8ce893 100644 --- a/rsky-video/src/main.rs +++ b/rsky-video/src/main.rs @@ -144,7 +144,7 @@ async fn main() -> color_eyre::Result<()> { .route("/health", get(health_check)) .route("/_health", get(health_check)) // Add middleware - .layer(DefaultBodyLimit::max(100 * 1024 * 1024)) // 100MB for video uploads + .layer(DefaultBodyLimit::max(5 * 1024 * 1024 * 1024)) // 5GB for video uploads .layer(TraceLayer::new_for_http()) .layer( CorsLayer::new() From 777b08789d55c08883f8d94854d81e701b1b4aa4 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 09:28:05 -0500 Subject: [PATCH 24/42] Add identity, account, and sync event handling to wintermute Previously wintermute only processed #commit events from the firehose, ignoring identity changes, account status updates, and sync events. Changes: - Add IdentityData and AccountData types to FirehoseEvent - Parse #identity events and update actor handles via DID resolution - Parse #account events and update actor upstream_status - Parse #sync events and refresh handles (like identity events) - Update all tests to include new FirehoseEvent fields This fixes handle changes not being reflected in the appview. --- rsky-wintermute/src/indexer/tests.rs | 2 + rsky-wintermute/src/ingester/mod.rs | 292 +++++++++++++++++++++++++- rsky-wintermute/src/ingester/tests.rs | 14 ++ rsky-wintermute/src/storage.rs | 2 + rsky-wintermute/src/types.rs | 13 ++ 5 files changed, 322 insertions(+), 1 deletion(-) diff --git a/rsky-wintermute/src/indexer/tests.rs b/rsky-wintermute/src/indexer/tests.rs index 513814b6..342fc23d 100644 --- a/rsky-wintermute/src/indexer/tests.rs +++ b/rsky-wintermute/src/indexer/tests.rs @@ -1065,6 +1065,8 @@ mod indexer_tests { ], blocks: vec![], }), + identity: None, + account: None, }; // Step 2: Simulate ingester processing event (enqueue to firehose_live) diff --git a/rsky-wintermute/src/ingester/mod.rs b/rsky-wintermute/src/ingester/mod.rs index 1842da76..9d413f41 100644 --- a/rsky-wintermute/src/ingester/mod.rs +++ b/rsky-wintermute/src/ingester/mod.rs @@ -33,6 +33,7 @@ enum ConnectionResult { } #[derive(Debug)] +#[allow(clippy::large_enum_variant)] pub enum ParseResult { Event(FirehoseEvent), Skip, @@ -376,6 +377,84 @@ impl IngesterManager { .with_label_values(&["firehose_live"]) .inc(); + // Handle identity events separately (handle changes, key rotations) + if event.kind == "identity" { + let pool_clone = Arc::clone(pool); + let event_did = event.did.clone(); + let event_time = event.time.clone(); + let event_handle = event.identity.as_ref().and_then(|i| i.handle.clone()); + tokio::spawn(async move { + if let Err(e) = Self::process_identity_event( + &pool_clone, + &event_did, + &event_time, + event_handle.as_deref(), + ) + .await + { + tracing::error!( + "identity event processing failed for {}: {e}", + event_did + ); + metrics::INGESTER_ERRORS_TOTAL + .with_label_values(&["identity_failed"]) + .inc(); + } + }); + last_seq.store(event.seq, Ordering::Relaxed); + continue; + } + + // Handle account events (takedown, suspension, deletion, reactivation) + if event.kind == "account" { + if let Some(ref account) = event.account { + let pool_clone = Arc::clone(pool); + let event_did = event.did.clone(); + let active = account.active; + let status = account.status.clone(); + tokio::spawn(async move { + if let Err(e) = Self::process_account_event( + &pool_clone, + &event_did, + active, + status.as_deref(), + ) + .await + { + tracing::error!( + "account event processing failed for {}: {e}", + event_did + ); + metrics::INGESTER_ERRORS_TOTAL + .with_label_values(&["account_failed"]) + .inc(); + } + }); + } + last_seq.store(event.seq, Ordering::Relaxed); + continue; + } + + // Handle sync events (repo recovery - refresh handle like identity events) + if event.kind == "sync" { + let pool_clone = Arc::clone(pool); + let event_did = event.did.clone(); + let event_time = event.time.clone(); + tokio::spawn(async move { + if let Err(e) = + Self::process_identity_event(&pool_clone, &event_did, &event_time, None) + .await + { + tracing::error!("sync event processing failed for {}: {e}", event_did); + metrics::INGESTER_ERRORS_TOTAL + .with_label_values(&["sync_failed"]) + .inc(); + } + }); + last_seq.store(event.seq, Ordering::Relaxed); + continue; + } + // Process inline: parse event and spawn indexing tasks directly (skip Fjall queue) match Self::parse_event_to_jobs(&event).await { Ok(jobs) => { @@ -483,7 +562,73 @@ impl IngesterManager { return Ok(ParseResult::Skip); } - // Only process #commit messages + // Handle #identity events (handle changes, key rotations, etc) + if header.type_ == "#identity" { + let body: rsky_lexicon::com::atproto::sync::SubscribeReposIdentity = + serde_ipld_dagcbor::from_reader(&mut cursor).map_err(|e| { + WintermuteError::Serialization(format!("failed to parse identity body: {e}")) + })?; + + let event = FirehoseEvent { + seq: body.seq, + did: body.did, + time: body.time.to_rfc3339(), + kind: "identity".to_owned(), + commit: None, + identity: Some(crate::types::IdentityData { + handle: body.handle, + }), + account: None, + }; + + return Ok(ParseResult::Event(event)); + } + + // Handle #account events (takedown, suspension, deletion, etc.) + if header.type_ == "#account" { + let body: rsky_lexicon::com::atproto::sync::SubscribeReposAccount = + serde_ipld_dagcbor::from_reader(&mut cursor).map_err(|e| { + WintermuteError::Serialization(format!("failed to parse account body: {e}")) + })?; + + let event = FirehoseEvent { + seq: body.seq, + did: body.did, + time: body.time.to_rfc3339(), + kind: "account".to_owned(), + commit: None, + identity: None, + account: Some(crate::types::AccountData { + active: body.active, + status: body.status.map(|s| s.to_string().to_lowercase()), + }), + }; + + return Ok(ParseResult::Event(event)); + } + + // Handle #sync events (repo state recovery/updates) + if header.type_ == "#sync" { + let body: rsky_lexicon::com::atproto::sync::SubscribeReposSync = + serde_ipld_dagcbor::from_reader(&mut cursor).map_err(|e| { + WintermuteError::Serialization(format!("failed to parse sync body: {e}")) + })?; + + // Treat sync like identity - refresh the handle + let event = FirehoseEvent { + seq: body.seq, + did: body.did, + time: body.time.to_rfc3339(), + kind: "sync".to_owned(), + commit: None, + identity: None, + account: None, + }; + + return Ok(ParseResult::Event(event)); + } + + // Only process #commit messages beyond this point if header.type_ != "#commit" { return Ok(ParseResult::Skip); } @@ -515,6 +660,8 @@ impl IngesterManager { ops, blocks: body.blocks, }), + identity: None, + account: None, }; Ok(ParseResult::Event(event)) @@ -713,6 +860,149 @@ impl IngesterManager { Ok(()) } + + /// Process an identity event by resolving the DID and updating the actor table + async fn process_identity_event( + pool: &Pool, + did: &str, + timestamp: &str, + handle_hint: Option<&str>, + ) -> Result<(), WintermuteError> { + use rsky_identity::IdResolver; + use rsky_identity::types::IdentityResolverOpts; + + tracing::debug!("processing identity event for {}", did); + + // If the event includes the handle, we can use it directly + // Otherwise, resolve the DID to get the current handle from the DID document + let handle = if let Some(h) = handle_hint { + Some(h.to_lowercase()) + } else { + // Resolve DID to get current handle from DID document + let mut resolver = IdResolver::new(IdentityResolverOpts { + timeout: Some(std::time::Duration::from_secs(5)), + plc_url: None, + did_cache: None, + backup_nameservers: None, + }); + + match resolver.did.resolve(did.to_owned(), None).await { + Ok(Some(doc)) => { + // Extract handle from alsoKnownAs (at:// URIs) + let handle = doc.also_known_as.as_ref().and_then(|akas| { + akas.iter() + .find(|aka| aka.starts_with("at://")) + .map(|aka| aka.strip_prefix("at://").unwrap_or(aka).to_lowercase()) + }); + + if let Some(ref h) = handle { + // Verify handle resolves back to this DID + match resolver.handle.resolve(h).await { + Ok(Some(resolved_did)) if resolved_did == did => { + tracing::info!("identity event: verified handle {} for {}", h, did); + } + _ => { + tracing::debug!( + "handle {} does not resolve back to {} - setting handle to null", + h, + did + ); + return Ok(()); // Don't update if handle doesn't verify + } + } + } + + handle + } + Ok(None) => { + tracing::warn!("DID {} not found", did); + return Ok(()); + } + Err(e) => { + tracing::warn!("failed to resolve DID {}: {}", did, e); + return Ok(()); // Don't fail on resolution errors, just skip + } + } + }; + + // Update actor table + let client = pool.get().await?; + let result = client + .execute( + "UPDATE actor SET handle = $1, indexed_at = $2 WHERE did = $3", + &[&handle, ×tamp, &did], + ) + .await?; + + if result > 0 { + tracing::info!( + "updated handle for {} to {:?}", + did, + handle.as_deref().unwrap_or("null") + ); + } else { + tracing::debug!("no actor found to update for {}", did); + } + + Ok(()) + } + + /// Process an account event by updating the actor's upstream status + async fn process_account_event( + pool: &Pool, + did: &str, + active: bool, + status: Option<&str>, + ) -> Result<(), WintermuteError> { + tracing::debug!( + "processing account event for {}: active={}, status={:?}", + did, + active, + status + ); + + // Determine upstream_status based on active flag and status + let upstream_status: Option<&str> = if active { + // Active accounts have no upstream status + None + } else { + // Inactive accounts: check for recognized statuses + match status { + Some(s) if ["deactivated", "suspended", "takendown", "deleted"].contains(&s) => { + Some(s) + } + Some(s) => { + tracing::warn!("unrecognized account status '{}' for {}", s, did); + Some(s) // Still store it, just log a warning + } + None => { + tracing::warn!("inactive account {} has no status", did); + None + } + } + }; + + // Update actor table + let client = pool.get().await?; + let result = client + .execute( + "UPDATE actor SET upstream_status = $1 WHERE did = $2", + &[&upstream_status, &did], + ) + .await?; + + if result > 0 { + tracing::info!( + "updated upstream_status for {} to {:?}", + did, + upstream_status.unwrap_or("null") + ); + } else { + tracing::debug!("no actor found to update status for {}", did); + } + + Ok(()) + } } async fn get_cursor_from_postgres(pool: &Pool, service: &str) -> Result { diff --git a/rsky-wintermute/src/ingester/tests.rs b/rsky-wintermute/src/ingester/tests.rs index 5f1e0646..429aad7e 100644 --- a/rsky-wintermute/src/ingester/tests.rs +++ b/rsky-wintermute/src/ingester/tests.rs @@ -263,6 +263,8 @@ mod ingester_tests { ops: vec![], blocks: vec![10, 20, 30], }), + identity: None, + account: None, }; storage.write_firehose_event(event.seq, &event).unwrap(); @@ -384,6 +386,8 @@ mod ingester_tests { ops: vec![], blocks: vec![], }), + identity: None, + account: None, }, FirehoseEvent { seq: 2, @@ -395,6 +399,8 @@ mod ingester_tests { ops: vec![], blocks: vec![], }), + identity: None, + account: None, }, FirehoseEvent { seq: 3, @@ -406,6 +412,8 @@ mod ingester_tests { ops: vec![], blocks: vec![], }), + identity: None, + account: None, }, ]; @@ -798,6 +806,8 @@ mod ingester_tests { ], blocks: vec![], }), + identity: None, + account: None, }; // Queue should be empty initially @@ -874,6 +884,8 @@ mod ingester_tests { ops: vec![], // No operations blocks: vec![], }), + identity: None, + account: None, }; // Should succeed but not enqueue anything @@ -895,6 +907,8 @@ mod ingester_tests { time: "2024-01-01T00:00:00Z".to_owned(), kind: "identity".to_owned(), commit: None, // No commit data + identity: None, + account: None, }; // Should succeed but not enqueue anything diff --git a/rsky-wintermute/src/storage.rs b/rsky-wintermute/src/storage.rs index d5b89e04..7b080af1 100644 --- a/rsky-wintermute/src/storage.rs +++ b/rsky-wintermute/src/storage.rs @@ -822,6 +822,8 @@ mod tests { ops: vec![], blocks: vec![], }), + identity: None, + account: None, }; storage.write_firehose_event(12345, &event).unwrap(); diff --git a/rsky-wintermute/src/types.rs b/rsky-wintermute/src/types.rs index 59110f75..0b39c454 100644 --- a/rsky-wintermute/src/types.rs +++ b/rsky-wintermute/src/types.rs @@ -51,6 +51,19 @@ pub struct FirehoseEvent { pub time: String, pub kind: String, pub commit: Option, + pub identity: Option, + pub account: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct IdentityData { + pub handle: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AccountData { + pub active: bool, + pub status: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] From 30bc19566073bc477272e4cf74b2c44a03500bbc Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 10:07:34 -0500 Subject: [PATCH 25/42] Fix profile/list/feed_generator updates being ignored Changed ON CONFLICT DO NOTHING to ON CONFLICT DO UPDATE for records that can be legitimately updated by users: - profile: displayName, description, avatarCid, bannerCid - feed_generator: displayName, description, avatarCid - list: name, description, avatarCid - starter_pack: name Also fixed batch_insert_profiles to include avatarCid and bannerCid columns which were previously missing. This fixes the bug where profile updates (like changing avatar/bio) were not being reflected in the appview because the original record was kept due to ON CONFLICT DO NOTHING. --- rsky-wintermute/src/indexer/mod.rs | 59 ++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index db2afe89..38c794ea 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2194,6 +2194,8 @@ impl IndexerManager { let mut creators: Vec = Vec::with_capacity(jobs.len()); let mut display_names: Vec> = Vec::with_capacity(jobs.len()); let mut descriptions: Vec> = Vec::with_capacity(jobs.len()); + let mut avatar_cids: Vec> = Vec::with_capacity(jobs.len()); + let mut banner_cids: Vec> = Vec::with_capacity(jobs.len()); let mut indexed_ats: Vec = Vec::with_capacity(jobs.len()); for pj in jobs { @@ -2201,12 +2203,26 @@ impl IndexerManager { let uri = pj.uri.to_string(); let display_name = sanitize_opt(record.get("displayName").and_then(|v| v.as_str())); let description = sanitize_opt(record.get("description").and_then(|v| v.as_str())); + let avatar_cid = record + .get("avatar") + .and_then(|v| v.get("ref")) + .and_then(|v| v.get("$link")) + .and_then(|v| v.as_str()) + .map(String::from); + let banner_cid = record + .get("banner") + .and_then(|v| v.get("ref")) + .and_then(|v| v.get("$link")) + .and_then(|v| v.as_str()) + .map(String::from); uris.push(uri); cids.push(pj.job.cid.clone()); creators.push(pj.did.clone()); display_names.push(display_name); descriptions.push(description); + avatar_cids.push(avatar_cid); + banner_cids.push(banner_cid); indexed_ats.push(pj.job.indexed_at.clone()); metrics::INDEXER_PROFILE_EVENTS_TOTAL.inc(); @@ -2215,10 +2231,16 @@ impl IndexerManager { client .execute( - "INSERT INTO profile (uri, cid, creator, \"displayName\", description, \"indexedAt\") - SELECT * FROM unnest($1::text[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[]) - ON CONFLICT DO NOTHING", - &[&uris, &cids, &creators, &display_names, &descriptions, &indexed_ats], + "INSERT INTO profile (uri, cid, creator, \"displayName\", description, \"avatarCid\", \"bannerCid\", \"indexedAt\") + SELECT * FROM unnest($1::text[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::text[]) + ON CONFLICT (uri) DO UPDATE SET + cid = EXCLUDED.cid, + \"displayName\" = EXCLUDED.\"displayName\", + description = EXCLUDED.description, + \"avatarCid\" = EXCLUDED.\"avatarCid\", + \"bannerCid\" = EXCLUDED.\"bannerCid\", + \"indexedAt\" = EXCLUDED.\"indexedAt\"", + &[&uris, &cids, &creators, &display_names, &descriptions, &avatar_cids, &banner_cids, &indexed_ats], ) .await?; @@ -3374,7 +3396,13 @@ impl IndexerManager { .execute( "INSERT INTO profile (uri, cid, creator, \"displayName\", description, \"avatarCid\", \"bannerCid\", \"joinedViaStarterPackUri\", \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) - ON CONFLICT DO NOTHING", + ON CONFLICT (uri) DO UPDATE SET + cid = EXCLUDED.cid, + \"displayName\" = EXCLUDED.\"displayName\", + description = EXCLUDED.description, + \"avatarCid\" = EXCLUDED.\"avatarCid\", + \"bannerCid\" = EXCLUDED.\"bannerCid\", + \"indexedAt\" = EXCLUDED.\"indexedAt\"", &[&uri, &cid, &did, &display_name, &description, &avatar_cid, &banner_cid, &joined_via_uri, &created_at, &indexed_at], ) .await?; @@ -3443,7 +3471,13 @@ impl IndexerManager { .execute( "INSERT INTO feed_generator (uri, cid, creator, \"feedDid\", \"displayName\", description, \"descriptionFacets\", \"avatarCid\", \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) - ON CONFLICT DO NOTHING", + ON CONFLICT (uri) DO UPDATE SET + cid = EXCLUDED.cid, + \"displayName\" = EXCLUDED.\"displayName\", + description = EXCLUDED.description, + \"descriptionFacets\" = EXCLUDED.\"descriptionFacets\", + \"avatarCid\" = EXCLUDED.\"avatarCid\", + \"indexedAt\" = EXCLUDED.\"indexedAt\"", &[&uri, &cid, &did, &feed_did, &display_name, &description, &description_facets, &avatar_cid, &created_at, &indexed_at], ) .await?; @@ -3496,7 +3530,13 @@ impl IndexerManager { .execute( "INSERT INTO list (uri, cid, creator, name, purpose, description, \"descriptionFacets\", \"avatarCid\", \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) - ON CONFLICT DO NOTHING", + ON CONFLICT (uri) DO UPDATE SET + cid = EXCLUDED.cid, + name = EXCLUDED.name, + description = EXCLUDED.description, + \"descriptionFacets\" = EXCLUDED.\"descriptionFacets\", + \"avatarCid\" = EXCLUDED.\"avatarCid\", + \"indexedAt\" = EXCLUDED.\"indexedAt\"", &[&uri, &cid, &did, &name, &purpose, &description, &description_facets, &avatar_cid, &created_at, &indexed_at], ) .await?; @@ -3636,7 +3676,10 @@ impl IndexerManager { .execute( "INSERT INTO starter_pack (uri, cid, creator, name, \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT DO NOTHING", + ON CONFLICT (uri) DO UPDATE SET + cid = EXCLUDED.cid, + name = EXCLUDED.name, + \"indexedAt\" = EXCLUDED.\"indexedAt\"", &[&uri, &cid, &did, &name, &created_at, &indexed_at], ) .await?; From 60a943d1adf0659394115ee10625945c69e34f9a Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 11:40:07 -0500 Subject: [PATCH 26/42] Fix duplicate notifications with unique constraint reference Updated all 6 notification INSERT statements to use: ON CONFLICT (did, "recordUri", reason) DO NOTHING This prevents duplicate notifications when the same event is processed multiple times (e.g., from live firehose and backfill). Requires adding unique index on notification table: CREATE UNIQUE INDEX notification_unique_idx ON notification (did, "recordUri", reason); --- rsky-wintermute/src/indexer/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 38c794ea..e499511c 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2828,7 +2828,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], ) .await?; @@ -3024,7 +3024,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) .await?; @@ -3096,7 +3096,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3168,7 +3168,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3269,7 +3269,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3415,7 +3415,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From 0555c9f193cdc9f5ef9ed366c6507437d2d9a412 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 11:56:36 -0500 Subject: [PATCH 27/42] Revert notification ON CONFLICT change - requires unique index first The ON CONFLICT (did, recordUri, reason) clause requires a unique index to exist, otherwise PostgreSQL throws an error. Reverting to ON CONFLICT DO NOTHING until the unique index can be created during a maintenance window. --- rsky-wintermute/src/indexer/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index e499511c..38c794ea 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2828,7 +2828,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], ) .await?; @@ -3024,7 +3024,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) .await?; @@ -3096,7 +3096,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3168,7 +3168,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3269,7 +3269,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3415,7 +3415,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From 6a5535234cf67a428f5e9f62c986d3dc257b451e Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 13:34:51 -0500 Subject: [PATCH 28/42] Fix duplicate notifications with unique constraint Added ON CONFLICT (did, recordUri, reason) DO NOTHING to all 6 notification INSERT statements. Works with the notification_unique_idx index that prevents duplicate notifications from being created. This fixes the issue where the same notification could appear multiple times with the same indexedAt timestamp due to parallel processing or retries. --- rsky-wintermute/src/indexer/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 38c794ea..e499511c 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2828,7 +2828,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], ) .await?; @@ -3024,7 +3024,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) .await?; @@ -3096,7 +3096,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3168,7 +3168,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3269,7 +3269,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3415,7 +3415,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From cb67eaef03887b4934c0d047bd2569acd37cd0c7 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 14:09:33 -0500 Subject: [PATCH 29/42] Revert "Fix duplicate notifications with unique constraint" This reverts commit 6a5535234cf67a428f5e9f62c986d3dc257b451e. --- rsky-wintermute/src/indexer/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index e499511c..38c794ea 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2828,7 +2828,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], ) .await?; @@ -3024,7 +3024,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) .await?; @@ -3096,7 +3096,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3168,7 +3168,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3269,7 +3269,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3415,7 +3415,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + ON CONFLICT DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From 954e5db33d3565b234cc536021809ae37477379a Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 16:46:31 -0500 Subject: [PATCH 30/42] Fix column names in identity/account event handlers Changed snake_case to camelCase to match PostgreSQL schema: - indexed_at -> indexedAt - upstream_status -> upstreamStatus --- rsky-wintermute/src/ingester/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rsky-wintermute/src/ingester/mod.rs b/rsky-wintermute/src/ingester/mod.rs index 9d413f41..3c04bf19 100644 --- a/rsky-wintermute/src/ingester/mod.rs +++ b/rsky-wintermute/src/ingester/mod.rs @@ -929,7 +929,7 @@ impl IngesterManager { let client = pool.get().await?; let result = client .execute( - "UPDATE actor SET handle = $1, indexed_at = $2 WHERE did = $3", + "UPDATE actor SET handle = $1, \"indexedAt\" = $2 WHERE did = $3", &[&handle, ×tamp, &did], ) .await?; @@ -986,7 +986,7 @@ impl IngesterManager { let client = pool.get().await?; let result = client .execute( - "UPDATE actor SET upstream_status = $1 WHERE did = $2", + "UPDATE actor SET \"upstreamStatus\" = $1 WHERE did = $2", &[&upstream_status, &did], ) .await?; From 8926b7eeaf7c0437008ed54039692c3017356ddf Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 16:52:31 -0500 Subject: [PATCH 31/42] Fix actor_block ON CONFLICT to use columns instead of constraint name Changed from ON CONFLICT ON CONSTRAINT actor_block_unique_subject to ON CONFLICT (creator, subjectDid) because there are two unique constraints on the same columns and the insert was hitting the other one. --- rsky-wintermute/src/indexer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 38c794ea..6946d1bd 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -3334,7 +3334,7 @@ impl IndexerManager { .execute( "INSERT INTO actor_block (uri, cid, creator, \"subjectDid\", \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT ON CONSTRAINT actor_block_unique_subject DO NOTHING", + ON CONFLICT (creator, \"subjectDid\") DO NOTHING", &[&uri, &cid, &did, &subject, &created_at, &indexed_at], ) .await?; From 36dff87d96bc70a2899e0ab90c9136fad9674f52 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Mon, 26 Jan 2026 16:55:53 -0500 Subject: [PATCH 32/42] Fix actor_block to use ON CONFLICT DO NOTHING for all constraints The table has multiple unique constraints (uri PK, plus two on creator/subjectDid). ON CONFLICT DO NOTHING handles conflicts on any of them. --- rsky-wintermute/src/indexer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 6946d1bd..39938c2e 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -3334,7 +3334,7 @@ impl IndexerManager { .execute( "INSERT INTO actor_block (uri, cid, creator, \"subjectDid\", \"createdAt\", \"indexedAt\") VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT (creator, \"subjectDid\") DO NOTHING", + ON CONFLICT DO NOTHING", &[&uri, &cid, &did, &subject, &created_at, &indexed_at], ) .await?; From c5d90d7553f63dbb335f87759e9381b979d29b20 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Tue, 27 Jan 2026 15:55:03 -0500 Subject: [PATCH 33/42] Add unique constraint columns to notification ON CONFLICT clause Changed all 6 notification INSERT statements to use ON CONFLICT (did, "recordUri", reason) DO NOTHING instead of just ON CONFLICT DO NOTHING. This requires the unique index notification_unique_idx to exist on the notification table with columns (did, "recordUri", reason). --- rsky-wintermute/src/indexer/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 39938c2e..29c4d4e8 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2828,7 +2828,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], ) .await?; @@ -3024,7 +3024,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) .await?; @@ -3096,7 +3096,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) .await?; @@ -3168,7 +3168,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) .await?; @@ -3269,7 +3269,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) .await?; @@ -3415,7 +3415,7 @@ impl IndexerManager { .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT DO NOTHING", + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) .await?; From abb3cdcaf20e74261740bab1584a694e9afa13a2 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 29 Jan 2026 11:40:45 -0500 Subject: [PATCH 34/42] Add reply notification for root post author Previously only the direct parent author received a notification for replies. Bluesky also notifies the thread root author (the person who started the thread) for any reply in their thread, up to 5 levels deep. This change adds a notification to the root post author when: - The reply has a root that differs from the parent (nested reply) - The root author is not the same as the post creator This matches Bluesky's behavior where users see notifications for replies anywhere in threads they started, not just direct replies. --- rsky-wintermute/src/indexer/mod.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 29c4d4e8..41fed741 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2819,7 +2819,8 @@ impl IndexerManager { ) .await?; - // Generate reply notification for parent post author + // Generate reply notifications for thread participants + // Notify the parent post author if let Some(parent_uri_str) = reply_parent { if let Ok(parent_uri) = AtUri::new(parent_uri_str.to_owned(), None) { let parent_author = parent_uri.get_hostname(); @@ -2835,6 +2836,26 @@ impl IndexerManager { } } + // Also notify the root post author if different from parent + // This ensures users get notified for replies anywhere in their thread + if let Some(root_uri_str) = reply_root { + if root_uri_str != parent_uri_str { + if let Ok(root_uri) = AtUri::new(root_uri_str.to_owned(), None) { + let root_author = root_uri.get_hostname(); + if root_author != did { + client + .execute( + "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + &[&root_author, &did, &uri, &cid, &"reply", &Some(root_uri_str), &sort_at], + ) + .await?; + } + } + } + } + // Update replyCount for parent post client .execute( From 4d9ff09e66355e8e4385cbd97f62e4f5a4c92f9c Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 29 Jan 2026 13:50:41 -0500 Subject: [PATCH 35/42] Implement full Bluesky notification behavior for posts Replaces the simple parent+root reply notification with the official Bluesky behavior from post.ts notifsForInsert: 1. Mention notifications: Parse post facets and create notifications for app.bsky.richtext.facet#mention features. Previously mentions were not generating any notifications. 2. Reply ancestor walk: Use recursive CTE to walk up the thread ancestor chain up to REPLY_NOTIF_DEPTH (5 levels), notifying each ancestor author. This matches the official behavior where users get notified for replies anywhere in their thread, not just direct replies. 3. Descendant notifications for out-of-order indexing: When a post in the middle of a thread is indexed after its replies, notify ancestors about existing descendant replies. Uses recursive CTE to find descendants, then cross-products with ancestors where depth + height < REPLY_NOTIF_DEPTH. Deduplication is handled by ON CONFLICT (did, recordUri, reason) DO NOTHING on all notification inserts. --- rsky-wintermute/src/indexer/mod.rs | 160 ++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 28 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 41fed741..bd93a7a5 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2819,38 +2819,142 @@ impl IndexerManager { ) .await?; - // Generate reply notifications for thread participants - // Notify the parent post author + // Generate mention notifications from facets + if let Some(facets) = record.get("facets").and_then(|f| f.as_array()) { + for facet in facets { + if let Some(features) = facet.get("features").and_then(|f| f.as_array()) { + for feature in features { + let feature_type = + feature.get("$type").and_then(|t| t.as_str()).unwrap_or(""); + if feature_type == "app.bsky.richtext.facet#mention" { + if let Some(mention_did) = feature.get("did").and_then(|d| d.as_str()) { + if mention_did != did { + client + .execute( + "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"sortAt\") + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + &[ + &mention_did, &did, &uri, &cid, &"mention", + &sort_at, + ], + ) + .await?; + } + } + } + } + } + } + } + + // Reply notifications: walk ancestor chain up to REPLY_NOTIF_DEPTH + // Matches official Bluesky behavior from post.ts notifsForInsert if let Some(parent_uri_str) = reply_parent { - if let Ok(parent_uri) = AtUri::new(parent_uri_str.to_owned(), None) { - let parent_author = parent_uri.get_hostname(); - if parent_author != did { - client - .execute( - "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", - &[&parent_author, &did, &uri, &cid, &"reply", &Some(parent_uri_str), &sort_at], - ) - .await?; + const REPLY_NOTIF_DEPTH: i32 = 5; + + // Query ancestors using recursive CTE + // Height 0 = self, 1 = parent, 2 = grandparent, etc. + let ancestors = client + .query( + "WITH RECURSIVE ancestor(uri, ancestor_uri, height) AS ( + SELECT p.uri, p.\"replyParent\", 0 + FROM post p + WHERE p.uri = $1 + UNION ALL + SELECT p.uri, p.\"replyParent\", a.height + 1 + FROM post p + INNER JOIN ancestor a ON a.ancestor_uri = p.uri + WHERE a.height < $2 + ) + SELECT uri, height FROM ancestor", + &[&uri, &REPLY_NOTIF_DEPTH], + ) + .await?; + + // Notify each ancestor author (skip self at height 0) + for row in &ancestors { + let height: i32 = row.get(1); + if height == 0 || height >= REPLY_NOTIF_DEPTH { + continue; + } + let ancestor_uri_str: String = row.get(0); + if let Ok(ancestor_uri) = AtUri::new(ancestor_uri_str.clone(), None) { + let ancestor_author = ancestor_uri.get_hostname(); + if ancestor_author != did { + client + .execute( + "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + &[ + &ancestor_author, + &did, + &uri, + &cid, + &"reply", + &ancestor_uri_str, + &sort_at, + ], + ) + .await?; + } } } - // Also notify the root post author if different from parent - // This ensures users get notified for replies anywhere in their thread - if let Some(root_uri_str) = reply_root { - if root_uri_str != parent_uri_str { - if let Ok(root_uri) = AtUri::new(root_uri_str.to_owned(), None) { - let root_author = root_uri.get_hostname(); - if root_author != did { - client - .execute( - "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") - VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", - &[&root_author, &did, &uri, &cid, &"reply", &Some(root_uri_str), &sort_at], - ) - .await?; + // Descendant notifications for out-of-order indexing + // When a post in the middle of a thread is indexed after its replies, + // we notify ancestors about existing descendant replies + let descendants = client + .query( + "WITH RECURSIVE descendent(uri, depth) AS ( + SELECT p.uri, 1 + FROM post p + WHERE p.\"replyParent\" = $1 AND 1 <= $2 + UNION ALL + SELECT p.uri, d.depth + 1 + FROM post p + INNER JOIN descendent d ON d.uri = p.\"replyParent\" + WHERE d.depth < $2 + ) + SELECT d.uri, d.depth, p.cid, p.creator, p.\"sortAt\" + FROM descendent d + INNER JOIN post p ON p.uri = d.uri", + &[&uri, &REPLY_NOTIF_DEPTH], + ) + .await?; + + for desc_row in &descendants { + let desc_uri: String = desc_row.get(0); + let desc_depth: i32 = desc_row.get(1); + let desc_cid: String = desc_row.get(2); + let desc_creator: String = desc_row.get(3); + let desc_sort_at: String = desc_row.get(4); + + for anc_row in &ancestors { + let anc_height: i32 = anc_row.get(1); + if desc_depth + anc_height < REPLY_NOTIF_DEPTH { + let anc_uri: String = anc_row.get(0); + if let Ok(anc_uri_parsed) = AtUri::new(anc_uri.clone(), None) { + let anc_author = anc_uri_parsed.get_hostname(); + if anc_author != &desc_creator { + client + .execute( + "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", + &[ + &anc_author, + &desc_creator, + &desc_uri, + &desc_cid, + &"reply", + &anc_uri, + &desc_sort_at, + ], + ) + .await?; + } } } } From f484a898c92e0c2dd26b80a123218b02aeb0ff2b Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 29 Jan 2026 19:58:24 -0500 Subject: [PATCH 36/42] Add logging to mention notification code path Tracing mention notification inserts to debug missing mention notifications in production. --- rsky-wintermute/src/indexer/mod.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index bd93a7a5..67f00e1d 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2829,7 +2829,11 @@ impl IndexerManager { if feature_type == "app.bsky.richtext.facet#mention" { if let Some(mention_did) = feature.get("did").and_then(|d| d.as_str()) { if mention_did != did { - client + tracing::info!( + "inserting mention notification: recipient={}, author={}, uri={}", + mention_did, did, uri + ); + let rows = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6) @@ -2840,12 +2844,20 @@ impl IndexerManager { ], ) .await?; + tracing::info!( + "mention notification result: rows_affected={}, recipient={}, uri={}", + rows, mention_did, uri + ); + } else { + tracing::debug!("skipping self-mention for {}", did); } } } } } } + } else { + tracing::debug!("no facets found for post {}", uri); } // Reply notifications: walk ancestor chain up to REPLY_NOTIF_DEPTH From 4b5638111b350e81ce38f3190970cb9587e8d8ac Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Sat, 31 Jan 2026 13:50:00 -0500 Subject: [PATCH 37/42] Fix label negation handling and add cid/exp field support The label ingestion pipeline was missing the neg field entirely, causing negation labels (neg: true) to be stored as neg: false. This meant labels that Bluesky removed were never actually negated in our database. Changes: - Add neg, cid, exp fields to Label and RawLabel structs - Parse neg from CBOR label messages (defaults to false if absent) - Update indexer INSERT to use actual neg value instead of hardcoded false - ON CONFLICT now updates neg, cts, and exp (matching TS dataplane) - Log negation labels at info level for visibility - Add test_label_negation and test_parse_label_message_with_negation - Fix clippy if_not_else lint in mention notification code --- Cargo.lock | 46 +++++++++- rsky-wintermute/src/bin/direct_index.rs | 10 +- rsky-wintermute/src/indexer/mod.rs | 60 ++++++++---- rsky-wintermute/src/indexer/tests.rs | 117 +++++++++++++++++++++++- rsky-wintermute/src/ingester/labels.rs | 10 +- rsky-wintermute/src/ingester/tests.rs | 50 +++++++++- rsky-wintermute/src/storage.rs | 3 + rsky-wintermute/src/types.rs | 3 + 8 files changed, 269 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dec6b548..59a234de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,6 +417,42 @@ dependencies = [ "trait-variant", ] +[[package]] +name = "atrium-api" +version = "0.25.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f182d9437cd447ed87eca75540151653e332d6753a2a4749d72c0f15aa1f179" +dependencies = [ + "atrium-common", + "atrium-xrpc", + "chrono", + "http 1.3.1", + "ipld-core", + "langtag", + "regex", + "serde", + "serde_bytes", + "serde_json", + "thiserror 1.0.69", + "tokio", + "trait-variant", +] + +[[package]] +name = "atrium-common" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff94b4ce3e9ba11d8bda83674e75ccaca281d5251ec3816d03e6bb23583ff4f" +dependencies = [ + "dashmap 6.1.0", + "lru 0.12.5", + "moka", + "thiserror 1.0.69", + "tokio", + "trait-variant", + "web-time", +] + [[package]] name = "atrium-xrpc" version = "0.12.3" @@ -8065,7 +8101,7 @@ name = "rsky-labeler" version = "0.1.3" dependencies = [ "anyhow", - "atrium-api", + "atrium-api 0.24.10", "atrium-xrpc-client", "chrono", "ciborium", @@ -8118,7 +8154,7 @@ dependencies = [ "anyhow", "argon2", "async-event-emitter", - "atrium-api", + "atrium-api 0.24.10", "atrium-xrpc-client", "aws-config", "aws-sdk-s3", @@ -8305,6 +8341,9 @@ dependencies = [ name = "rsky-video" version = "0.1.0" dependencies = [ + "atrium-api 0.25.7", + "atrium-xrpc", + "atrium-xrpc-client", "axum", "base64 0.22.1", "bytes", @@ -8314,12 +8353,15 @@ dependencies = [ "deadpool-postgres", "futures", "jsonwebtoken", + "k256", "mockito", "multihash-codetable", "prometheus", + "rand 0.8.5", "reqwest 0.12.23", "rsky-syntax", "rustls 0.23.31", + "sec1 0.7.3", "serde", "serde_json", "tempfile", diff --git a/rsky-wintermute/src/bin/direct_index.rs b/rsky-wintermute/src/bin/direct_index.rs index 7c883af9..6b5f0e34 100644 --- a/rsky-wintermute/src/bin/direct_index.rs +++ b/rsky-wintermute/src/bin/direct_index.rs @@ -5,10 +5,10 @@ use clap::Parser; use color_eyre::Result; use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod, Runtime}; use iroh_car::CarReader; -use rsky_identity::types::IdentityResolverOpts; use rsky_identity::IdResolver; -use rsky_repo::storage::memory_blockstore::MemoryBlockstore; +use rsky_identity::types::IdentityResolverOpts; use rsky_repo::readable_repo::ReadableRepo; +use rsky_repo::storage::memory_blockstore::MemoryBlockstore; use rsky_syntax::aturi::AtUri; use tokio_postgres::NoTls; @@ -240,7 +240,7 @@ async fn process_did( client .execute( "INSERT INTO profile_agg (did, \"postsCount\") - SELECT $1, COUNT(*) FROM post WHERE creator = $1 + SELECT $1::varchar, COUNT(*) FROM post WHERE creator = $1::varchar ON CONFLICT (did) DO UPDATE SET \"postsCount\" = EXCLUDED.\"postsCount\"", &[&did], ) @@ -249,7 +249,7 @@ async fn process_did( client .execute( "INSERT INTO profile_agg (did, \"followsCount\") - SELECT $1, COUNT(*) FROM follow WHERE creator = $1 + SELECT $1::varchar, COUNT(*) FROM follow WHERE creator = $1::varchar ON CONFLICT (did) DO UPDATE SET \"followsCount\" = EXCLUDED.\"followsCount\"", &[&did], ) @@ -258,7 +258,7 @@ async fn process_did( client .execute( "INSERT INTO profile_agg (did, \"followersCount\") - SELECT $1, COUNT(*) FROM follow WHERE \"subjectDid\" = $1 + SELECT $1::varchar, COUNT(*) FROM follow WHERE \"subjectDid\" = $1::varchar ON CONFLICT (did) DO UPDATE SET \"followersCount\" = EXCLUDED.\"followersCount\"", &[&did], ) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 67f00e1d..e0a4c82e 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -2716,32 +2716,50 @@ impl IndexerManager { // Process each label in the event for label in &label_event.labels { - // Insert or update the label - // Note: Using empty string for cid since label messages don't include it - // The primary key is (src, uri, cid, val), so we use "" as cid + let cid = label.cid.as_deref().unwrap_or(""); + let exp: Option<&str> = label.exp.as_deref(); + let result = client .execute( - "INSERT INTO label (src, uri, cid, val, cts, neg) - VALUES ($1, $2, $3, $4, $5, false) + "INSERT INTO label (src, uri, cid, val, neg, cts, exp) + VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (src, uri, cid, val) DO UPDATE SET - cts = EXCLUDED.cts", - &[&label.src, &label.uri, &"", &label.val, &label.cts], + neg = EXCLUDED.neg, + cts = EXCLUDED.cts, + exp = EXCLUDED.exp", + &[ + &label.src, &label.uri, &cid, &label.val, &label.neg, &label.cts, &exp, + ], ) .await; match result { Ok(_) => { - tracing::debug!( - "indexed label: src={} uri={} val={}", + if label.neg { + tracing::info!( + "negated label: src={} uri={} val={}", + label.src, + label.uri, + label.val + ); + } else { + tracing::debug!( + "indexed label: src={} uri={} val={}", + label.src, + label.uri, + label.val + ); + } + } + Err(e) => { + tracing::error!( + "failed to insert label: src={} uri={} val={} neg={}: {e}", label.src, label.uri, - label.val + label.val, + label.neg ); - } - Err(e) => { - tracing::error!("failed to insert label: {e}"); metrics::INDEXER_RECORDS_FAILED_TOTAL.inc(); - // Continue processing other labels even if one fails } } } @@ -2828,10 +2846,14 @@ impl IndexerManager { feature.get("$type").and_then(|t| t.as_str()).unwrap_or(""); if feature_type == "app.bsky.richtext.facet#mention" { if let Some(mention_did) = feature.get("did").and_then(|d| d.as_str()) { - if mention_did != did { + if mention_did == did { + tracing::debug!("skipping self-mention for {}", did); + } else { tracing::info!( "inserting mention notification: recipient={}, author={}, uri={}", - mention_did, did, uri + mention_did, + did, + uri ); let rows = client .execute( @@ -2846,10 +2868,10 @@ impl IndexerManager { .await?; tracing::info!( "mention notification result: rows_affected={}, recipient={}, uri={}", - rows, mention_did, uri + rows, + mention_did, + uri ); - } else { - tracing::debug!("skipping self-mention for {}", did); } } } diff --git a/rsky-wintermute/src/indexer/tests.rs b/rsky-wintermute/src/indexer/tests.rs index 342fc23d..285d3460 100644 --- a/rsky-wintermute/src/indexer/tests.rs +++ b/rsky-wintermute/src/indexer/tests.rs @@ -492,8 +492,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }], }; @@ -514,7 +517,7 @@ mod indexer_tests { // Verify the label data let row = client .query_one( - "SELECT src, uri, val, cts FROM label WHERE src = $1 AND cid = ''", + "SELECT src, uri, val, cts, neg FROM label WHERE src = $1 AND cid = ''", &[&test_src], ) .await @@ -524,11 +527,13 @@ mod indexer_tests { let uri: String = row.get(1); let val: String = row.get(2); let cts: String = row.get(3); + let neg: bool = row.get(4); assert_eq!(src, test_src); assert_eq!(uri, test_uri); assert_eq!(val, "spam"); assert_eq!(cts, "2025-01-20T10:00:00Z"); + assert!(!neg, "expected neg to be false"); cleanup_test_labels(&pool, test_src).await; } @@ -546,20 +551,29 @@ mod indexer_tests { crate::types::Label { src: test_src.to_owned(), uri: "at://did:plc:user1/app.bsky.feed.post/post1".to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }, crate::types::Label { src: test_src.to_owned(), uri: "at://did:plc:user2/app.bsky.feed.post/post2".to_owned(), + cid: None, val: "nsfw".to_owned(), + neg: false, cts: "2025-01-20T10:01:00Z".to_owned(), + exp: None, }, crate::types::Label { src: test_src.to_owned(), uri: "at://did:plc:user3/app.bsky.feed.post/post3".to_owned(), + cid: None, val: "porn".to_owned(), + neg: false, cts: "2025-01-20T10:02:00Z".to_owned(), + exp: None, }, ], }; @@ -626,8 +640,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }], }; @@ -651,8 +668,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T11:00:00Z".to_owned(), // Different timestamp + exp: None, }], }; @@ -700,8 +720,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src1.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }], }; @@ -714,8 +737,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src2.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:01:00Z".to_owned(), + exp: None, }], }; @@ -754,8 +780,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }], }; @@ -768,8 +797,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: test_uri.to_owned(), + cid: None, val: "nsfw".to_owned(), + neg: false, cts: "2025-01-20T10:01:00Z".to_owned(), + exp: None, }], }; @@ -807,8 +839,11 @@ mod indexer_tests { labels: vec![crate::types::Label { src: test_src.to_owned(), uri: "at://did:plc:user/app.bsky.feed.post/roundtrip".to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }], }; @@ -845,6 +880,86 @@ mod indexer_tests { cleanup_test_labels(&pool, test_src).await; } + #[tokio::test] + async fn test_label_negation() { + let pool = setup_test_pool(); + let test_src = "did:plc:test_labeler_negation"; + let test_uri = "did:plc:user_negation_test"; + + cleanup_test_labels(&pool, test_src).await; + + // First: apply a takedown label + let label_event1 = crate::types::LabelEvent { + seq: 8000, + labels: vec![crate::types::Label { + src: test_src.to_owned(), + uri: test_uri.to_owned(), + cid: None, + val: "!takedown".to_owned(), + neg: false, + cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, + }], + }; + + let result = IndexerManager::process_label_event(&pool, &label_event1).await; + assert!(result.is_ok()); + + // Verify label exists with neg=false + let client = pool.get().await.unwrap(); + let row = client + .query_one( + "SELECT neg FROM label WHERE src = $1 AND uri = $2 AND cid = '' AND val = '!takedown'", + &[&test_src, &test_uri], + ) + .await + .unwrap(); + let neg: bool = row.get(0); + assert!(!neg, "initial label should have neg=false"); + + // Second: negate the takedown label + let label_event2 = crate::types::LabelEvent { + seq: 8001, + labels: vec![crate::types::Label { + src: test_src.to_owned(), + uri: test_uri.to_owned(), + cid: None, + val: "!takedown".to_owned(), + neg: true, + cts: "2025-01-20T11:00:00Z".to_owned(), + exp: None, + }], + }; + + let result = IndexerManager::process_label_event(&pool, &label_event2).await; + assert!(result.is_ok()); + + // Verify label now has neg=true (upserted, not duplicated) + let count: i64 = client + .query_one( + "SELECT COUNT(*) FROM label WHERE src = $1 AND uri = $2 AND cid = '' AND val = '!takedown'", + &[&test_src, &test_uri], + ) + .await + .unwrap() + .get(0); + assert_eq!(count, 1, "should still be 1 row after negation"); + + let row = client + .query_one( + "SELECT neg, cts FROM label WHERE src = $1 AND uri = $2 AND cid = '' AND val = '!takedown'", + &[&test_src, &test_uri], + ) + .await + .unwrap(); + let neg: bool = row.get(0); + let cts: String = row.get(1); + assert!(neg, "neg should be true after negation"); + assert_eq!(cts, "2025-01-20T11:00:00Z", "cts should be updated"); + + cleanup_test_labels(&pool, test_src).await; + } + #[tokio::test] async fn test_label_indexing_empty_labels_array() { let pool = setup_test_pool(); diff --git a/rsky-wintermute/src/ingester/labels.rs b/rsky-wintermute/src/ingester/labels.rs index 939f2e5e..f27088dd 100644 --- a/rsky-wintermute/src/ingester/labels.rs +++ b/rsky-wintermute/src/ingester/labels.rs @@ -235,11 +235,14 @@ pub fn parse_label_message(data: &[u8]) -> Result, Wintermute struct RawLabel { src: String, uri: String, - val: String, - #[allow(dead_code)] #[serde(default)] cid: Option, + val: String, + #[serde(default)] + neg: Option, cts: String, + #[serde(default)] + exp: Option, } let mut cursor = std::io::Cursor::new(data); @@ -262,8 +265,11 @@ pub fn parse_label_message(data: &[u8]) -> Result, Wintermute .map(|raw| Label { src: raw.src, uri: raw.uri, + cid: raw.cid, val: raw.val, + neg: raw.neg.unwrap_or(false), cts: raw.cts, + exp: raw.exp, }) .collect(); diff --git a/rsky-wintermute/src/ingester/tests.rs b/rsky-wintermute/src/ingester/tests.rs index 429aad7e..a2b707a6 100644 --- a/rsky-wintermute/src/ingester/tests.rs +++ b/rsky-wintermute/src/ingester/tests.rs @@ -460,6 +460,19 @@ mod ingester_tests { fn create_label_message( seq: i64, labels: Vec<(&str, &str, &str, &str)>, // (src, uri, val, cts) + ) -> Vec { + create_label_message_with_neg( + seq, + labels + .into_iter() + .map(|(src, uri, val, cts)| (src, uri, val, cts, false)) + .collect(), + ) + } + + fn create_label_message_with_neg( + seq: i64, + labels: Vec<(&str, &str, &str, &str, bool)>, // (src, uri, val, cts, neg) ) -> Vec { #[derive(serde::Serialize)] struct Header { @@ -473,6 +486,8 @@ mod ingester_tests { uri: String, val: String, cts: String, + #[serde(skip_serializing_if = "std::ops::Not::not")] + neg: bool, } #[derive(serde::Serialize)] @@ -490,11 +505,12 @@ mod ingester_tests { seq, labels: labels .into_iter() - .map(|(src, uri, val, cts)| RawLabel { + .map(|(src, uri, val, cts, neg)| RawLabel { src: src.to_owned(), uri: uri.to_owned(), val: val.to_owned(), cts: cts.to_owned(), + neg, }) .collect(), }; @@ -529,6 +545,32 @@ mod ingester_tests { assert_eq!(label.uri, "at://did:plc:user456/app.bsky.feed.post/abc123"); assert_eq!(label.val, "spam"); assert_eq!(label.cts, "2025-01-20T10:30:00Z"); + assert!(!label.neg, "default neg should be false"); + } + + #[test] + fn test_parse_label_message_with_negation() { + let msg_bytes = create_label_message_with_neg( + 99999, + vec![( + "did:plc:ar7c4by46qjdydhdevvrndac", + "did:plc:user123", + "!takedown", + "2025-11-27T06:20:00Z", + true, + )], + ); + + let result = crate::ingester::labels::parse_label_message(&msg_bytes).unwrap(); + assert!(result.is_some()); + let label_event = result.unwrap(); + assert_eq!(label_event.labels.len(), 1); + + let label = &label_event.labels[0]; + assert_eq!(label.src, "did:plc:ar7c4by46qjdydhdevvrndac"); + assert_eq!(label.uri, "did:plc:user123"); + assert_eq!(label.val, "!takedown"); + assert!(label.neg, "neg should be true for negation labels"); } #[test] @@ -603,14 +645,20 @@ mod ingester_tests { crate::types::Label { src: "did:plc:labeler".to_owned(), uri: "at://did:plc:user/app.bsky.feed.post/abc".to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-20T10:00:00Z".to_owned(), + exp: None, }, crate::types::Label { src: "did:plc:labeler".to_owned(), uri: "at://did:plc:user/app.bsky.feed.post/def".to_owned(), + cid: None, val: "nsfw".to_owned(), + neg: false, cts: "2025-01-20T10:01:00Z".to_owned(), + exp: None, }, ], }; diff --git a/rsky-wintermute/src/storage.rs b/rsky-wintermute/src/storage.rs index 7b080af1..fd381d43 100644 --- a/rsky-wintermute/src/storage.rs +++ b/rsky-wintermute/src/storage.rs @@ -1183,8 +1183,11 @@ mod tests { labels: vec![Label { src: "did:plc:labeler".to_owned(), uri: "at://did:plc:test/app.bsky.feed.post/123".to_owned(), + cid: None, val: "spam".to_owned(), + neg: false, cts: "2025-01-01T00:00:00Z".to_owned(), + exp: None, }], }; diff --git a/rsky-wintermute/src/types.rs b/rsky-wintermute/src/types.rs index 0b39c454..6a6c4edd 100644 --- a/rsky-wintermute/src/types.rs +++ b/rsky-wintermute/src/types.rs @@ -115,6 +115,9 @@ pub struct LabelEvent { pub struct Label { pub src: String, pub uri: String, + pub cid: Option, pub val: String, + pub neg: bool, pub cts: String, + pub exp: Option, } From a5c0395849289739c8faf3df5d437aa3c1318819 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Sat, 31 Jan 2026 15:04:43 -0500 Subject: [PATCH 38/42] Fix verification indexing: use app.bsky.graph.verification instead of app.bsky.verification.proof The indexer was matching on a nonexistent collection type app.bsky.verification.proof instead of the correct app.bsky.graph.verification lexicon. This caused all verification records from the firehose to be silently ignored, leaving the verification table empty. Also fixed the URI format strings in index_verification and delete_verification to use the correct collection path. --- rsky-wintermute/src/indexer/mod.rs | 10 +++++----- rsky-wintermute/src/indexer/tests.rs | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index e0a4c82e..86c80f82 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -1315,7 +1315,7 @@ impl IndexerManager { ) .await?; } - "app.bsky.verification.proof" => { + "app.bsky.graph.verification" => { Self::index_verification( &client, did.as_str(), @@ -1389,7 +1389,7 @@ impl IndexerManager { "app.bsky.actor.status" => { Self::delete_status(&client, did.as_str(), rkey.as_str()).await?; } - "app.bsky.verification.proof" => { + "app.bsky.graph.verification" => { Self::delete_verification(&client, did.as_str(), rkey.as_str()).await?; } _ => {} @@ -1842,7 +1842,7 @@ impl IndexerManager { "app.bsky.actor.status" => { Self::index_status(client, did, rkey, record, cid, indexed_at).await } - "app.bsky.verification.proof" => { + "app.bsky.graph.verification" => { Self::index_verification(client, did, rkey, record, cid, indexed_at).await } _ => Ok(()), @@ -4121,7 +4121,7 @@ impl IndexerManager { indexed_at: &str, ) -> Result<(), WintermuteError> { let uri_obj = AtUri::new( - format!("at://{did}/app.bsky.verification.proof/{rkey}"), + format!("at://{did}/app.bsky.graph.verification/{rkey}"), None, ) .map_err(|e| WintermuteError::Other(format!("invalid uri: {e}")))?; @@ -4159,7 +4159,7 @@ impl IndexerManager { rkey: &str, ) -> Result<(), WintermuteError> { let uri_obj = AtUri::new( - format!("at://{did}/app.bsky.verification.proof/{rkey}"), + format!("at://{did}/app.bsky.graph.verification/{rkey}"), None, ) .map_err(|e| WintermuteError::Other(format!("invalid uri: {e}")))?; diff --git a/rsky-wintermute/src/indexer/tests.rs b/rsky-wintermute/src/indexer/tests.rs index 285d3460..7245a6db 100644 --- a/rsky-wintermute/src/indexer/tests.rs +++ b/rsky-wintermute/src/indexer/tests.rs @@ -1708,7 +1708,7 @@ mod indexer_tests { // Test newer collection types that were previously untested let test_collections = vec![ ( - "app.bsky.verification.proof", + "app.bsky.graph.verification", json!({ "subject": "did:plc:verified", "handle": "verified.test", From aebcaa66d4add7e51f2e07e01328777dda3fbfa4 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Thu, 5 Feb 2026 16:25:09 -0500 Subject: [PATCH 39/42] Add TODO list for community posts remaining work Tracks remaining items: - Hydration-time CID verification in client - Firehose listener for orphaned content cleanup - Community post threadgate support - Community feed aggregation - Content expiration policy --- TODO.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 00000000..dacc03a3 --- /dev/null +++ b/TODO.md @@ -0,0 +1,66 @@ +# TODO - Blacksky Community Posts + +## Completed + +- [x] Client-side CID computation before submission +- [x] Appview CID verification (expectedCid parameter) +- [x] CidMismatch error on verification failure +- [x] Rename contentHash to cid across codebase + +## Remaining Work + +### High Priority + +- [ ] **Hydration-time CID verification in client** + - Fetch stub record from user's PDS to get authoritative CID + - Fetch hydrated content from appview + - Compute CID from hydrated content + - Compare: if mismatch, warn user that content may have been tampered + - Location: `blacksky.community/src/state/queries/community-feed.ts` + +- [ ] **Firehose listener for orphaned content cleanup** + - Listen for delete events on `community.blacksky.feed.post` collection + - When stub is deleted from PDS, delete corresponding content from appview's `community_post` table + - Prevents orphaned content when user deletes stub directly via `deleteRecord` + - Location: rsky-wintermute or separate service + +### Medium Priority + +- [ ] **Community post threadgate support** + - Implement `community.blacksky.feed.threadgate` record type + - Allow post authors to restrict who can reply + - Rules: mentionRule, followerRule, followingRule, listRule + - Lexicon exists at: `lexicons/community/blacksky/feed/threadgate.json` + +- [ ] **Community feed aggregation** + - Global community feed (all members' posts) + - Filtered by engagement, recency, etc. + - New endpoint: `community.blacksky.feed.getCommunityTimeline` + +### Low Priority + +- [ ] **Stub record verification on post fetch** + - When fetching a community post, optionally verify stub exists in user's PDS + - Ensures post wasn't created by appview without user's consent + - Trade-off: adds latency, may not be necessary for all use cases + +- [ ] **Content expiration policy** + - Allow users to set expiration on community posts + - Auto-delete content after expiration while keeping stub as tombstone + - Useful for ephemeral content + +## Notes + +### On-Demand Hydration Pattern +The community posts follow the "on-demand record hydration" pattern: +1. Stub record in user's PDS: `{ createdAt, cid }` +2. Full content stored on appview +3. CID computed by client = source of truth for integrity +4. Appview hydrates stub with full content on fetch + +### Integrity Guarantee +The CID in the stub is a cryptographic commitment: +- Computed by CLIENT from canonical record +- Stored in user's PDS (user controls) +- If appview modifies content, CID won't match +- Clients can verify by recomputing CID from hydrated content From d04fbc5366f263ac7da774b07dd7360cb0f3af6a Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Tue, 17 Feb 2026 16:41:56 -0500 Subject: [PATCH 40/42] Use dedicated videos schema for rsky-video database tables Move all rsky-video SQL queries to reference the videos schema (videos.video_jobs, videos.upload_quotas, videos.video_mappings). Add CREATE SCHEMA IF NOT EXISTS videos to migrations. Includes cargo fmt formatting fixes across rsky-video. --- rsky-video/src/auth/mod.rs | 12 +++++- rsky-video/src/bunny/mod.rs | 11 +---- rsky-video/src/db/mod.rs | 77 ++++++++++++++++++----------------- rsky-video/src/main.rs | 16 ++++---- rsky-video/src/pds/mod.rs | 22 +++++++--- rsky-video/src/signing/mod.rs | 5 +-- rsky-video/src/xrpc/mod.rs | 40 +++++++++--------- 7 files changed, 99 insertions(+), 84 deletions(-) diff --git a/rsky-video/src/auth/mod.rs b/rsky-video/src/auth/mod.rs index a3182d44..d14d3e91 100644 --- a/rsky-video/src/auth/mod.rs +++ b/rsky-video/src/auth/mod.rs @@ -81,7 +81,11 @@ pub fn validate_service_auth( debug!( "Service auth: iss={}, sub={:?}, aud={}, lxm={:?}, user_did={}", - claims.iss, claims.sub, claims.aud, claims.lxm, claims.user_did() + claims.iss, + claims.sub, + claims.aud, + claims.lxm, + claims.user_did() ); // Check expiration @@ -130,7 +134,11 @@ pub fn decode_service_auth(token: &str) -> Result { debug!( "Service auth (no aud check): iss={}, sub={:?}, aud={}, lxm={:?}, user_did={}", - claims.iss, claims.sub, claims.aud, claims.lxm, claims.user_did() + claims.iss, + claims.sub, + claims.aud, + claims.lxm, + claims.user_did() ); // Check expiration diff --git a/rsky-video/src/bunny/mod.rs b/rsky-video/src/bunny/mod.rs index c32e6c97..223772ea 100644 --- a/rsky-video/src/bunny/mod.rs +++ b/rsky-video/src/bunny/mod.rs @@ -179,18 +179,11 @@ impl BunnyClient { /// Returns the video bytes pub async fn download_video(&self, video_id: &str) -> Result { // The original video is available at the CDN URL with /play.mp4 suffix - let url = format!( - "https://{}.b-cdn.net/{}/original", - self.pull_zone, video_id - ); + let url = format!("https://{}.b-cdn.net/{}/original", self.pull_zone, video_id); debug!("Downloading video from Bunny: {}", url); - let response = self - .client - .get(&url) - .send() - .await?; + let response = self.client.get(&url).send().await?; if !response.status().is_success() { let status = response.status(); diff --git a/rsky-video/src/db/mod.rs b/rsky-video/src/db/mod.rs index d9a17c92..446cf615 100644 --- a/rsky-video/src/db/mod.rs +++ b/rsky-video/src/db/mod.rs @@ -53,11 +53,14 @@ pub struct UploadQuota { pub async fn run_migrations(pool: &Pool) -> Result<()> { let client = pool.get().await?; - // Create video_jobs table + client + .execute("CREATE SCHEMA IF NOT EXISTS videos", &[]) + .await?; + client .execute( r#" - CREATE TABLE IF NOT EXISTS video_jobs ( + CREATE TABLE IF NOT EXISTS videos.video_jobs ( id BIGSERIAL PRIMARY KEY, job_id UUID NOT NULL UNIQUE, did TEXT NOT NULL, @@ -78,51 +81,45 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { ) .await?; - // Add video_cid column if it doesn't exist (migration for existing tables) client .execute( - "ALTER TABLE video_jobs ADD COLUMN IF NOT EXISTS video_cid TEXT", + "ALTER TABLE videos.video_jobs ADD COLUMN IF NOT EXISTS video_cid TEXT", &[], ) .await?; - // Add pds_blob_ref column to store the real blob reference from the PDS client .execute( - "ALTER TABLE video_jobs ADD COLUMN IF NOT EXISTS pds_blob_ref JSONB", + "ALTER TABLE videos.video_jobs ADD COLUMN IF NOT EXISTS pds_blob_ref JSONB", &[], ) .await?; - // Create index on job_id client .execute( - "CREATE INDEX IF NOT EXISTS idx_video_jobs_job_id ON video_jobs (job_id)", + "CREATE INDEX IF NOT EXISTS idx_video_jobs_job_id ON videos.video_jobs (job_id)", &[], ) .await?; - // Create index on bunny_video_id for webhook lookups client .execute( - "CREATE INDEX IF NOT EXISTS idx_video_jobs_bunny_video_id ON video_jobs (bunny_video_id)", + "CREATE INDEX IF NOT EXISTS idx_video_jobs_bunny_video_id ON videos.video_jobs (bunny_video_id)", &[], ) .await?; - // Create index on did for quota lookups client .execute( - "CREATE INDEX IF NOT EXISTS idx_video_jobs_did ON video_jobs (did)", + "CREATE INDEX IF NOT EXISTS idx_video_jobs_did ON videos.video_jobs (did)", &[], ) .await?; - // Create upload_quotas table client .execute( r#" - CREATE TABLE IF NOT EXISTS upload_quotas ( + CREATE TABLE IF NOT EXISTS videos.upload_quotas ( did TEXT PRIMARY KEY, daily_videos_used INTEGER DEFAULT 0, daily_bytes_used BIGINT DEFAULT 0, @@ -134,11 +131,10 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { ) .await?; - // Create video_mappings table for did/cid -> bunny_video_id mapping client .execute( r#" - CREATE TABLE IF NOT EXISTS video_mappings ( + CREATE TABLE IF NOT EXISTS videos.video_mappings ( id BIGSERIAL PRIMARY KEY, did TEXT NOT NULL, cid TEXT NOT NULL, @@ -151,10 +147,9 @@ pub async fn run_migrations(pool: &Pool) -> Result<()> { ) .await?; - // Create index for video mapping lookups client .execute( - "CREATE INDEX IF NOT EXISTS idx_video_mappings_did_cid ON video_mappings (did, cid)", + "CREATE INDEX IF NOT EXISTS idx_video_mappings_did_cid ON videos.video_mappings (did, cid)", &[], ) .await?; @@ -176,7 +171,7 @@ pub async fn create_job( let row = client .query_one( r#" - INSERT INTO video_jobs (job_id, did, original_filename, file_size) + INSERT INTO videos.video_jobs (job_id, did, original_filename, file_size) VALUES ($1, $2, $3, $4) RETURNING id, job_id, did, bunny_video_id, video_cid, pds_blob_ref, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at "#, @@ -195,7 +190,7 @@ pub async fn get_job(pool: &Pool, job_id: Uuid) -> Result> { .query_opt( r#" SELECT id, job_id, did, bunny_video_id, video_cid, pds_blob_ref, state, progress, blob_ref, error, message, original_filename, file_size, created_at, updated_at - FROM video_jobs + FROM videos.video_jobs WHERE job_id = $1 "#, &[&job_id], @@ -213,7 +208,7 @@ pub async fn get_job_by_bunny_id(pool: &Pool, bunny_video_id: &str) -> Result Result Result<()> { +pub async fn set_bunny_video_id( + pool: &Pool, + job_id: Uuid, + bunny_video_id: &str, + video_cid: &str, +) -> Result<()> { let client = pool.get().await?; client .execute( r#" - UPDATE video_jobs + UPDATE videos.video_jobs SET bunny_video_id = $2, video_cid = $3, state = 'JOB_STATE_UPLOADING', updated_at = NOW() WHERE job_id = $1 "#, @@ -242,13 +242,18 @@ pub async fn set_bunny_video_id(pool: &Pool, job_id: Uuid, bunny_video_id: &str, } /// Store the PDS blob reference (from com.atproto.repo.uploadBlob) -pub async fn set_pds_blob_ref(pool: &Pool, job_id: Uuid, pds_blob_ref: JsonValue, video_cid: &str) -> Result<()> { +pub async fn set_pds_blob_ref( + pool: &Pool, + job_id: Uuid, + pds_blob_ref: JsonValue, + video_cid: &str, +) -> Result<()> { let client = pool.get().await?; client .execute( r#" - UPDATE video_jobs + UPDATE videos.video_jobs SET pds_blob_ref = $2, video_cid = $3, updated_at = NOW() WHERE job_id = $1 "#, @@ -266,7 +271,7 @@ pub async fn update_job_state(pool: &Pool, job_id: Uuid, state: &str, progress: client .execute( r#" - UPDATE video_jobs + UPDATE videos.video_jobs SET state = $2, progress = $3, updated_at = NOW() WHERE job_id = $1 "#, @@ -284,7 +289,7 @@ pub async fn complete_job(pool: &Pool, job_id: Uuid, blob_ref: JsonValue) -> Res client .execute( r#" - UPDATE video_jobs + UPDATE videos.video_jobs SET state = 'JOB_STATE_COMPLETED', progress = 100, blob_ref = $2, updated_at = NOW() WHERE job_id = $1 "#, @@ -302,7 +307,7 @@ pub async fn fail_job(pool: &Pool, job_id: Uuid, error: &str) -> Result<()> { client .execute( r#" - UPDATE video_jobs + UPDATE videos.video_jobs SET state = 'JOB_STATE_FAILED', error = $2, updated_at = NOW() WHERE job_id = $1 "#, @@ -318,10 +323,9 @@ pub async fn get_or_create_quota(pool: &Pool, did: &str) -> Result let client = pool.get().await?; let now = Utc::now(); - // Try to get existing quota let row = client .query_opt( - "SELECT did, daily_videos_used, daily_bytes_used, quota_reset_at FROM upload_quotas WHERE did = $1", + "SELECT did, daily_videos_used, daily_bytes_used, quota_reset_at FROM videos.upload_quotas WHERE did = $1", &[&did], ) .await?; @@ -329,12 +333,10 @@ pub async fn get_or_create_quota(pool: &Pool, did: &str) -> Result if let Some(row) = row { let quota_reset_at: DateTime = row.get(3); - // Check if quota should be reset (new day) if now.date_naive() > quota_reset_at.date_naive() { - // Reset quota client .execute( - "UPDATE upload_quotas SET daily_videos_used = 0, daily_bytes_used = 0, quota_reset_at = $2 WHERE did = $1", + "UPDATE videos.upload_quotas SET daily_videos_used = 0, daily_bytes_used = 0, quota_reset_at = $2 WHERE did = $1", &[&did, &now], ) .await?; @@ -355,10 +357,9 @@ pub async fn get_or_create_quota(pool: &Pool, did: &str) -> Result }); } - // Create new quota record client .execute( - "INSERT INTO upload_quotas (did, quota_reset_at) VALUES ($1, $2) ON CONFLICT (did) DO NOTHING", + "INSERT INTO videos.upload_quotas (did, quota_reset_at) VALUES ($1, $2) ON CONFLICT (did) DO NOTHING", &[&did, &now], ) .await?; @@ -377,7 +378,7 @@ pub async fn increment_quota(pool: &Pool, did: &str, bytes: i64) -> Result<()> { client .execute( - "UPDATE upload_quotas SET daily_videos_used = daily_videos_used + 1, daily_bytes_used = daily_bytes_used + $2 WHERE did = $1", + "UPDATE videos.upload_quotas SET daily_videos_used = daily_videos_used + 1, daily_bytes_used = daily_bytes_used + $2 WHERE did = $1", &[&did, &bytes], ) .await?; @@ -397,7 +398,7 @@ pub async fn save_video_mapping( client .execute( r#" - INSERT INTO video_mappings (did, cid, bunny_video_id) + INSERT INTO videos.video_mappings (did, cid, bunny_video_id) VALUES ($1, $2, $3) ON CONFLICT (did, cid) DO UPDATE SET bunny_video_id = $3 "#, @@ -414,7 +415,7 @@ pub async fn get_bunny_video_id(pool: &Pool, did: &str, cid: &str) -> Result color_eyre::Result<()> { Some(s) } Err(e) => { - tracing::warn!("Failed to load signing key, PDS uploads will not work: {}", e); + tracing::warn!( + "Failed to load signing key, PDS uploads will not work: {}", + e + ); None } } } None => { - tracing::warn!("No signing key configured (SIGNING_KEY_PATH), PDS uploads will not work"); + tracing::warn!( + "No signing key configured (SIGNING_KEY_PATH), PDS uploads will not work" + ); None } }; @@ -124,10 +129,7 @@ async fn main() -> color_eyre::Result<()> { "/xrpc/app.bsky.video.getUploadLimits", get(xrpc::get_upload_limits), ) - .route( - "/xrpc/app.bsky.video.uploadVideo", - post(xrpc::upload_video), - ) + .route("/xrpc/app.bsky.video.uploadVideo", post(xrpc::upload_video)) .route( "/xrpc/app.bsky.video.getJobStatus", get(xrpc::get_job_status), diff --git a/rsky-video/src/pds/mod.rs b/rsky-video/src/pds/mod.rs index 40b3f008..ec40ab22 100644 --- a/rsky-video/src/pds/mod.rs +++ b/rsky-video/src/pds/mod.rs @@ -111,7 +111,10 @@ impl PdsClient { } _ => { // DID document not found or invalid - use direct endpoint - debug!("No DID document found for {}, using direct endpoint: {}", did, endpoint); + debug!( + "No DID document found for {}, using direct endpoint: {}", + did, endpoint + ); } } @@ -194,7 +197,8 @@ impl PdsClient { let size = data.len(); debug!("Uploading {} bytes to {}", size, upload_url); - let response = self.http_client + let response = self + .http_client .post(&upload_url) .header("Authorization", format!("Bearer {}", token)) .header("Content-Type", mime_type) @@ -235,17 +239,22 @@ impl PdsClient { link: String, } - let upload_response: UploadBlobResponse = response.json().await + let upload_response: UploadBlobResponse = response + .json() + .await .map_err(|e| Error::Internal(format!("Failed to parse PDS response: {}", e)))?; - info!("Blob uploaded to PDS: size={}, cid={}", size, upload_response.blob.cid_ref.link); + info!( + "Blob uploaded to PDS: size={}, cid={}", + size, upload_response.blob.cid_ref.link + ); // Convert to atrium BlobRef format // We need to construct the proper BlobRef type let blob_ref = BlobRef::Typed(TypedBlobRef::Blob(atrium_api::types::Blob { r#ref: atrium_api::types::CidLink( cid::Cid::try_from(upload_response.blob.cid_ref.link.as_str()) - .map_err(|e| Error::Internal(format!("Invalid CID from PDS: {}", e)))? + .map_err(|e| Error::Internal(format!("Invalid CID from PDS: {}", e)))?, ), mime_type: upload_response.blob.mime_type, size: upload_response.blob.size as usize, @@ -259,7 +268,8 @@ impl PdsClient { let url = url::Url::parse(endpoint) .map_err(|e| Error::Internal(format!("Invalid endpoint URL: {}", e)))?; - let host = url.host_str() + let host = url + .host_str() .ok_or_else(|| Error::Internal("Endpoint has no host".to_string()))?; Ok(format!("did:web:{}", host)) diff --git a/rsky-video/src/signing/mod.rs b/rsky-video/src/signing/mod.rs index c8928f22..16c9aff2 100644 --- a/rsky-video/src/signing/mod.rs +++ b/rsky-video/src/signing/mod.rs @@ -5,7 +5,7 @@ //! and creating properly signed JWTs. use base64::{Engine as _, engine::general_purpose::URL_SAFE_NO_PAD}; -use k256::ecdsa::{SigningKey, Signature, signature::Signer}; +use k256::ecdsa::{Signature, SigningKey, signature::Signer}; use k256::pkcs8::DecodePrivateKey; use serde::{Deserialize, Serialize}; use std::fs; @@ -58,8 +58,7 @@ impl ServiceAuthSigner { .or_else(|_| { // Try SEC1 format (EC PRIVATE KEY) use k256::SecretKey; - SecretKey::from_sec1_pem(&pem_content) - .map(|sk| SigningKey::from(sk)) + SecretKey::from_sec1_pem(&pem_content).map(|sk| SigningKey::from(sk)) }) .map_err(|e| Error::Internal(format!("Failed to parse signing key: {}", e)))?; diff --git a/rsky-video/src/xrpc/mod.rs b/rsky-video/src/xrpc/mod.rs index 41fc13be..7caae8da 100644 --- a/rsky-video/src/xrpc/mod.rs +++ b/rsky-video/src/xrpc/mod.rs @@ -196,7 +196,10 @@ pub async fn upload_video( // STEP 1: Upload blob to user's PDS FIRST // Forward the client's service auth token to the PDS. // The token should have aud: user's PDS DID (not video service). - info!("Uploading blob to PDS for user {} using client token", user_did); + info!( + "Uploading blob to PDS for user {} using client token", + user_did + ); let pds_blob_ref = match state .pds_client @@ -212,9 +215,8 @@ pub async fn upload_video( }; // Extract the CID from the PDS blob_ref - this is the real content-addressed CID - let video_cid = pds::extract_cid(&pds_blob_ref).ok_or_else(|| { - Error::Internal("PDS returned invalid blob reference".to_string()) - })?; + let video_cid = pds::extract_cid(&pds_blob_ref) + .ok_or_else(|| Error::Internal("PDS returned invalid blob reference".to_string()))?; info!("PDS returned blob with CID: {}", video_cid); // Convert to JSON for storage @@ -359,24 +361,18 @@ pub async fn bunny_webhook( info!("Video encoding complete: job={}", job.job_id); // Get the content CID from the job (from PDS upload) - let video_cid = job.video_cid.ok_or_else(|| { - Error::Internal("Job missing video CID".to_string()) - })?; + let video_cid = job + .video_cid + .ok_or_else(|| Error::Internal("Job missing video CID".to_string()))?; // Use the PDS blob_ref that was stored during upload // This is the real blob reference from the user's PDS - let blob_ref = job.pds_blob_ref.ok_or_else(|| { - Error::Internal("Job missing PDS blob reference".to_string()) - })?; + let blob_ref = job + .pds_blob_ref + .ok_or_else(|| Error::Internal("Job missing PDS blob reference".to_string()))?; // Save the mapping for URL proxy: (did, cid) -> bunny_video_id - db::save_video_mapping( - &state.db_pool, - &job.did, - &video_cid, - &payload.video_guid, - ) - .await?; + db::save_video_mapping(&state.db_pool, &job.did, &video_cid, &payload.video_guid).await?; // Mark job as complete with the PDS blob_ref db::complete_job(&state.db_pool, job.job_id, blob_ref).await?; @@ -427,7 +423,10 @@ pub async fn proxy_playlist( } None => { // Video not in our system - fallback to Bluesky's video CDN - debug!("Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", did, cid); + debug!( + "Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", + did, cid + ); format!("https://video.bsky.app/watch/{}/{}/playlist.m3u8", did, cid) } }; @@ -460,7 +459,10 @@ pub async fn proxy_thumbnail( } None => { // Video not in our system - fallback to Bluesky's video CDN - debug!("Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", did, cid); + debug!( + "Video not in our DB, falling back to Bluesky CDN: did={}, cid={}", + did, cid + ); format!("https://video.bsky.app/watch/{}/{}/thumbnail.jpg", did, cid) } }; From 2209a007a9874f7b3689b99d342a8389b19124e9 Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Fri, 20 Feb 2026 11:55:11 -0500 Subject: [PATCH 41/42] Add pinned post support to feed generator When PINNED_POST_URI is set, the configured post is inserted at position 0 of the feed on first-page requests only (no cursor). Paginated scroll requests are unaffected. Banned users continue to see only the banned notice post. --- rsky-feedgen/src/lib.rs | 1 + rsky-feedgen/src/main.rs | 1 + rsky-feedgen/src/routes.rs | 17 ++++++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/rsky-feedgen/src/lib.rs b/rsky-feedgen/src/lib.rs index 67737a51..3ec6becb 100644 --- a/rsky-feedgen/src/lib.rs +++ b/rsky-feedgen/src/lib.rs @@ -74,6 +74,7 @@ pub struct FeedGenConfig { pub sponsored_post_uri: String, pub sponsored_post_probability: f64, pub trending_percentile_min: f64, + pub pinned_post_uri: String, } pub mod apis; diff --git a/rsky-feedgen/src/main.rs b/rsky-feedgen/src/main.rs index a772d138..3e5ef344 100644 --- a/rsky-feedgen/src/main.rs +++ b/rsky-feedgen/src/main.rs @@ -141,6 +141,7 @@ fn rocket() -> _ { Ok(percentile) => percentile, }, }, + pinned_post_uri: env::var("PINNED_POST_URI").unwrap_or_default(), }; rocket::custom(figment) diff --git a/rsky-feedgen/src/routes.rs b/rsky-feedgen/src/routes.rs index 4365681b..85345b5c 100644 --- a/rsky-feedgen/src/routes.rs +++ b/rsky-feedgen/src/routes.rs @@ -155,7 +155,7 @@ pub async fn index( Err(_) => eprintln!("Failed to write anonymous visitor."), } } - match feed { + let mut result = match feed { _blacksky if _blacksky == BLACKSKY && !is_banned => { match crate::apis::get_all_posts(None, limit, cursor, true, connection, config).await { Ok(response) => Ok(Json(response)), @@ -408,7 +408,22 @@ pub async fn index( Json(internal_error), )) } + }; + + // Insert pinned post at position 0 on first load only (no cursor = first page) + // Skip for banned users who should only see the banned notice + if cursor.is_none() && !is_banned && !config.pinned_post_uri.is_empty() { + if let Ok(ref mut response) = result { + response.feed.insert( + 0, + crate::models::PostResult { + post: config.pinned_post_uri.clone(), + }, + ); + } } + + result } #[rocket::put("/cursor?&")] From 1ef0691f933b896eabe80804402697cc56e0ca8b Mon Sep 17 00:00:00 2001 From: Rudy Fraser Date: Tue, 24 Feb 2026 17:34:00 -0500 Subject: [PATCH 42/42] Make notification INSERT failures non-fatal in indexer Notification inserts were using .await? which caused the entire indexing function to bail out on failure. This prevented post_agg, profile_agg, and feed_agg updates from running. Changed all 8 notification INSERT sites to log warnings instead of propagating errors, ensuring aggregate count updates always complete. Root cause: notification_id_seq hit 32-bit integer max (2147483647), causing all notification inserts to fail and cascading to block all aggregate count updates across the appview. --- rsky-wintermute/src/indexer/mod.rs | 128 ++++++++++++++++++++++++----- 1 file changed, 106 insertions(+), 22 deletions(-) diff --git a/rsky-wintermute/src/indexer/mod.rs b/rsky-wintermute/src/indexer/mod.rs index 86c80f82..e206e7bd 100644 --- a/rsky-wintermute/src/indexer/mod.rs +++ b/rsky-wintermute/src/indexer/mod.rs @@ -1326,6 +1326,16 @@ impl IndexerManager { ) .await?; } + "community.blacksky.feed.post" => { + Self::index_community_post_stub( + &client, + did.as_str(), + rkey.as_str(), + &job.cid, + &job.indexed_at, + ) + .await?; + } _ => {} } } @@ -1392,6 +1402,9 @@ impl IndexerManager { "app.bsky.graph.verification" => { Self::delete_verification(&client, did.as_str(), rkey.as_str()).await?; } + "community.blacksky.feed.post" => { + Self::delete_community_post(&client, did.as_str(), rkey.as_str()).await?; + } _ => {} } } @@ -1845,6 +1858,9 @@ impl IndexerManager { "app.bsky.graph.verification" => { Self::index_verification(client, did, rkey, record, cid, indexed_at).await } + "community.blacksky.feed.post" => { + Self::index_community_post_stub(client, did, rkey, cid, indexed_at).await + } _ => Ok(()), } } @@ -2855,7 +2871,7 @@ impl IndexerManager { did, uri ); - let rows = client + match client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6) @@ -2865,13 +2881,16 @@ impl IndexerManager { &sort_at, ], ) - .await?; - tracing::info!( - "mention notification result: rows_affected={}, recipient={}, uri={}", - rows, - mention_did, - uri - ); + .await + { + Ok(rows) => tracing::info!( + "mention notification result: rows_affected={}, recipient={}, uri={}", + rows, + mention_did, + uri + ), + Err(e) => tracing::warn!("failed to insert mention notification for {uri}: {e}"), + } } } } @@ -2916,7 +2935,7 @@ impl IndexerManager { if let Ok(ancestor_uri) = AtUri::new(ancestor_uri_str.clone(), None) { let ancestor_author = ancestor_uri.get_hostname(); if ancestor_author != did { - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) @@ -2931,7 +2950,10 @@ impl IndexerManager { &sort_at, ], ) - .await?; + .await + { + tracing::warn!("failed to insert reply notification for {uri}: {e}"); + } } } } @@ -2972,7 +2994,7 @@ impl IndexerManager { if let Ok(anc_uri_parsed) = AtUri::new(anc_uri.clone(), None) { let anc_author = anc_uri_parsed.get_hostname(); if anc_author != &desc_creator { - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) @@ -2987,7 +3009,10 @@ impl IndexerManager { &desc_sort_at, ], ) - .await?; + .await + { + tracing::warn!("failed to insert reply notification for {desc_uri}: {e}"); + } } } } @@ -3179,14 +3204,17 @@ impl IndexerManager { } else { created_at }; - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &["ed_author, &creator, &post_uri, &post_cid, &"quote", &Some(embed_uri), &sort_at], ) - .await?; + .await + { + tracing::warn!("failed to insert quote notification for {post_uri}: {e}"); + } } } } @@ -3212,6 +3240,50 @@ impl IndexerManager { Ok(()) } + /// Index a community post stub arriving from the firehose. + /// The full content was already stored via community.blacksky.feed.submitPost XRPC. + /// This just updates the CID on the existing `community_post` row. + async fn index_community_post_stub( + client: &deadpool_postgres::Client, + did: &str, + rkey: &str, + cid: &str, + indexed_at: &str, + ) -> Result<(), WintermuteError> { + let uri = format!("at://{did}/community.blacksky.feed.post/{rkey}"); + + let rows = client + .execute( + "UPDATE community_post SET cid = $1, \"indexedAt\" = $2 WHERE uri = $3", + &[&cid, &indexed_at, &uri], + ) + .await?; + + if rows == 0 { + tracing::debug!( + "community post stub for {} not found in community_post table (content not yet submitted)", + uri + ); + } else { + tracing::info!("updated community post stub cid for {}", uri); + } + + Ok(()) + } + + async fn delete_community_post( + client: &deadpool_postgres::Client, + did: &str, + rkey: &str, + ) -> Result<(), WintermuteError> { + let uri = format!("at://{did}/community.blacksky.feed.post/{rkey}"); + client + .execute("DELETE FROM community_post WHERE uri = $1", &[&uri]) + .await?; + tracing::info!("deleted community post {}", uri); + Ok(()) + } + async fn index_like( client: &deadpool_postgres::Client, did: &str, @@ -3251,14 +3323,17 @@ impl IndexerManager { if let Ok(subject_uri) = AtUri::new(subject.to_owned(), None) { let subject_author = subject_uri.get_hostname(); if subject_author != did { - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"like", &Some(subject), &indexed_at], ) - .await?; + .await + { + tracing::warn!("failed to insert like notification for {uri}: {e}"); + } } } } @@ -3323,14 +3398,17 @@ impl IndexerManager { .await?; if row_count > 0 { - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject, &did, &uri, &cid, &"follow", &None::, &indexed_at], ) - .await?; + .await + { + tracing::warn!("failed to insert follow notification for {uri}: {e}"); + } } client @@ -3424,14 +3502,17 @@ impl IndexerManager { if let Ok(subject_uri) = AtUri::new(subject.to_owned(), None) { let subject_author = subject_uri.get_hostname(); if subject_author != did { - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&subject_author, &did, &uri, &cid, &"repost", &Some(subject), &indexed_at], ) - .await?; + .await + { + tracing::warn!("failed to insert repost notification for {uri}: {e}"); + } } } } @@ -3570,14 +3651,17 @@ impl IndexerManager { if let Some(starter_pack_uri_str) = joined_via_uri { if let Ok(starter_pack_uri) = AtUri::new(starter_pack_uri_str.to_owned(), None) { let starter_pack_author = starter_pack_uri.get_hostname(); - client + if let Err(e) = client .execute( "INSERT INTO notification (did, author, \"recordUri\", \"recordCid\", reason, \"reasonSubject\", \"sortAt\") VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (did, \"recordUri\", reason) DO NOTHING", &[&starter_pack_author, &did, &uri, &cid, &"starterpack-joined", &Some(starter_pack_uri_str), &indexed_at], ) - .await?; + .await + { + tracing::warn!("failed to insert starterpack-joined notification for {uri}: {e}"); + } } } }