WIP add partial tile cleanup utility

lukevalenta · lukevalenta · commit c24e2bf30f80 · 2025-07-16T14:20:22.000-04:00
The tlog-tiles and static-ct-api specs allow partial tiles to be deleted
when the corresponding full tile is available. This helps to reduce R2
storage costs, but will incur extra cost for the R2 list and delete
operations.

There are some limitations with the current cron job approach:
* Workers have a 1000 subrequest limit, so log cleanup needs to be
  broken up into many invocations, saving state in between.
* Cron jobs can only be triggered once a minute, so cleaning up a log
  could be slow.
* A single cron job is responsible for cleaning up all logs.
* The workers-rs Rust bindings currently don't support deleting multiple
  keys at once from a bucket, so we could quickly hit subrequest limits.

TODO Switch to a Durable Object to manage log cleanup. This has some
nice benefits:
* Saving/loading state is easy with DO storage
* DO alarms can be scheduled for immediate execution, so we aren't
  wasting time in between invocations.
* We can have one cleanup DO per log for better parallelism.
* We can easily make this generic the same was as for the other DOs.
* (bonus) Lays the groundwork for implementing a tlog-witness or
  tlog-mirror as a service that periodically updates based on a target
  log's latest checkpoint.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/ct_worker/Cargo.toml b/crates/ct_worker/Cargo.toml
@@ -53,6 +53,7 @@ serde_with.workspace = true
 sha2.workspace = true
 static_ct_api.workspace = true
 signed_note.workspace = true
+thiserror.workspace = true
 tlog_tiles.workspace = true
 worker.workspace = true
 x509-verify.workspace = true
diff --git a/crates/ct_worker/src/lib.rs b/crates/ct_worker/src/lib.rs
@@ -6,16 +6,19 @@
 use config::AppConfig;
 use ed25519_dalek::SigningKey as Ed25519SigningKey;
 use p256::{ecdsa::SigningKey as EcdsaSigningKey, pkcs8::DecodePrivateKey};
+use signed_note::KeyName;
+use static_ct_api::StaticCTCheckpointSigner;
 use std::collections::HashMap;
 use std::sync::{LazyLock, OnceLock};
-use tlog_tiles::{LookupKey, SequenceMetadata};
+use tlog_tiles::{CheckpointSigner, Ed25519CheckpointSigner, LookupKey, SequenceMetadata};
 #[allow(clippy::wildcard_imports)]
 use worker::*;
 use x509_util::CertPool;
 use x509_verify::x509_cert::Certificate;
 
 mod batcher_do;
 mod frontend_worker;
+mod partial_cleanup_cron;
 mod sequencer_do;
 
 // Application configuration.
@@ -72,3 +75,31 @@ pub(crate) fn load_witness_key(env: &Env, name: &str) -> Result<&'static Ed25519
         Ok(once.get_or_init(|| key))
     }
 }
+
+pub(crate) fn load_checkpoint_signers(env: &Env, name: &str) -> Vec<Box<dyn CheckpointSigner>> {
+    let origin = load_origin(name);
+    let signing_key = load_signing_key(env, name).unwrap().clone();
+    let witness_key = load_witness_key(env, name).unwrap().clone();
+
+    // Make the checkpoint signers from the secret keys and put them in a vec
+    let signer = StaticCTCheckpointSigner::new(origin.clone(), signing_key)
+        .map_err(|e| format!("could not create static-ct checkpoint signer: {e}"))
+        .unwrap();
+    let witness = Ed25519CheckpointSigner::new(origin, witness_key)
+        .map_err(|e| format!("could not create ed25519 checkpoint signer: {e}"))
+        .unwrap();
+
+    vec![Box::new(signer), Box::new(witness)]
+}
+
+pub(crate) fn load_origin(name: &str) -> KeyName {
+    KeyName::new(
+        CONFIG.logs[name]
+            .submission_url
+            .trim_start_matches("http://")
+            .trim_start_matches("https://")
+            .trim_end_matches('/')
+            .to_string(),
+    )
+    .expect("invalid origin name")
+}
diff --git a/crates/ct_worker/src/partial_cleanup_cron.rs b/crates/ct_worker/src/partial_cleanup_cron.rs
@@ -0,0 +1,231 @@
+use futures_util::future::join_all;
+use generic_log_worker::{load_public_bucket, log_ops::CHECKPOINT_KEY, util::now_millis};
+use signed_note::{KeyName, NoteVerifier, VerifierList};
+use static_ct_api::StaticCTPendingLogEntry;
+use tlog_tiles::{PendingLogEntry, TlogTile};
+use worker::{event, Bucket, Env, ScheduleContext, ScheduledEvent};
+
+use crate::{load_checkpoint_signers, load_origin, CONFIG};
+
+// Workers are limited to 1000 subrequests per invocation (including R2 operations).
+// For each log, we'll need to perform the following subrequests:
+// - Get old and new log sizes (2 ops)
+// - List partials for full tree, data, and (optional) aux tiles (2-3 ops per 256 entries, plus logarithmic level-1+ tree tiles)
+// - Delete partials for full tree, data, and (optional) aux tiles (0-3 ops per 256 entries, after <https://github.com/cloudflare/workers-rs/issues/780>)
+// - Save new tree size (1 op)
+// We track subrequest to avoid going over the limit, but can still limit the range of entries.
+const SUBREQUEST_LIMIT: usize = 1000;
+const STEP: u64 = TlogTile::FULL_WIDTH as u64;
+const CLEANED_SIZE_KEY: &str = "_cleanup_cron_progress";
+
+#[derive(thiserror::Error, Debug)]
+enum CleanupError {
+    #[error(transparent)]
+    Worker(#[from] worker::Error),
+    #[error("subrequest limit")]
+    Subrequests,
+}
+
+/// Partial tile cleanup cron job periodically does the following:
+///
+/// for each configured log:
+///   1. set new_size to the current (verified) checkpoint size
+///   2. set old_size to the checkpoint size when the cron job previously successfully ran
+///   3. get the list of tiles created between old_size and new_size (via `TlogTile::new_tiles(old_size, new_size)`)
+///   4. for each full tile:
+///     a. list the corresponding partial tiles (matching the prefix "<full tile key>.p/")
+///     b. delete the partial tiles
+#[event(scheduled)]
+pub async fn scheduled(_event: ScheduledEvent, env: Env, _ctx: ScheduleContext) {
+    let mut subrequests = 0;
+    for name in CONFIG.logs.keys() {
+        if checked_add_subrequests(&mut subrequests, 3).is_err() {
+            // We need three subrequests to check and set the log size. If we've
+            // already reached the subrequest limit, stop now.
+            return;
+        }
+
+        let origin = &load_origin(name);
+        let verifiers = &VerifierList::new(
+            load_checkpoint_signers(&env, name)
+                .iter()
+                .map(|s| s.verifier())
+                .collect::<Vec<Box<dyn NoteVerifier>>>(),
+        );
+        let bucket = &load_public_bucket(&env, name).unwrap();
+        let current_log_size = current_log_size(origin, verifiers, bucket).await.unwrap();
+        let old_cleaned_size = cleaned_size(bucket).await.unwrap();
+        log::debug!("cleaning {name}: {old_cleaned_size} to {current_log_size}");
+        match clean_log(old_cleaned_size, current_log_size, bucket, &mut subrequests).await {
+            Ok(cleaned_size) => {
+                // Save progress on cleaning the log.
+                if cleaned_size > old_cleaned_size {
+                    let _ = set_cleaned_size(cleaned_size, bucket)
+                        .await
+                        .inspect_err(|e| log::warn!("failed to update cleaned size: {name}: {e}"));
+                }
+            }
+            Err(e) => log::warn!("failed to clean log: {name}: {e}"),
+        }
+    }
+}
+
+// Clean up partial tiles from a log, stopping either when the current log size
+// is reached or the subrequest limit is reached. Returns the size of the tree
+// that has been cleaned so partial progress can be saved.
+async fn clean_log(
+    old_size: u64,
+    new_size: u64,
+    bucket: &Bucket,
+    subrequests: &mut usize,
+) -> Result<u64, CleanupError> {
+    let mut cleaned_size = old_size;
+    loop {
+        if cleaned_size + STEP > new_size {
+            // We've already cleaned the last full tile, so nothing else to do.
+            break;
+        }
+        match clean_log_range(cleaned_size, cleaned_size + STEP, subrequests, bucket).await {
+            Ok(()) => cleaned_size += STEP,
+            Err(e) => {
+                return match e {
+                    CleanupError::Subrequests => Ok(cleaned_size),
+                    CleanupError::Worker(_) => Err(e),
+                }
+            }
+        }
+    }
+    Ok(cleaned_size)
+}
+
+// Attempt to clean up all partial tiles within the specified range. Any failure
+// will require the full range to be retried later.
+//
+// # Errors
+// Will return `CleanupError::Subrequests` if the operation cannot be completed
+// because it would run into subrequest limits, and will return a
+// `CleanupError::Worker` if any other error occurs.
+async fn clean_log_range(
+    start_size: u64,
+    end_size: u64,
+    subrequests: &mut usize,
+    bucket: &Bucket,
+) -> Result<(), CleanupError> {
+    // Get tree tiles between the start and end sizes.
+    for tile in TlogTile::new_tiles(start_size, end_size) {
+        // Full tiles only. If the full tile exists, the corresponding partial tiles can be deleted.
+        if tile.width() == 1 << tile.height() {
+            if tile.level() == 0 {
+                // for level-0 tree tiles, delete the corresponding data and (optional) aux files.
+                delete_dir(
+                    &format!(
+                        "{}.p/",
+                        tile.with_data_path(StaticCTPendingLogEntry::DATA_TILE_PATH)
+                            .path()
+                    ),
+                    bucket,
+                    subrequests,
+                )
+                .await?;
+                if let Some(aux_path) = StaticCTPendingLogEntry::AUX_TILE_PATH {
+                    delete_dir(
+                        &format!("{}.p/", tile.with_data_path(aux_path).path()),
+                        bucket,
+                        subrequests,
+                    )
+                    .await?;
+                }
+            }
+            delete_dir(&format!("{}.p/", tile.path()), bucket, subrequests).await?;
+        }
+    }
+    Ok(())
+}
+
+// Delete all files in the specified directory.
+//
+// # Errors
+// Will return `CleanupError::Subrequests` and abort early if the subrequest
+// limit is reached before successfully deleting the directory, and will return
+// a `CleanupError::Worker` if any other error occurs.
+async fn delete_dir(
+    prefix: &str,
+    bucket: &Bucket,
+    subrequests: &mut usize,
+) -> Result<(), CleanupError> {
+    log::debug!("deleting {prefix}");
+    checked_add_subrequests(subrequests, 1)?;
+    let objects = bucket.list().prefix(prefix).execute().await?;
+    // TODO add binding to delete multiple keys from R2 bucket. Otherwise, we'll
+    // quickly hit workers subrequest limits.
+    // Tracking issue: <https://github.com/cloudflare/workers-rs/issues/780>
+    checked_add_subrequests(subrequests, objects.objects().len())?;
+    let futures = objects
+        .objects()
+        .iter()
+        .map(|obj| bucket.delete(obj.key()))
+        .collect::<Vec<_>>();
+    join_all(futures)
+        .await
+        .into_iter()
+        .collect::<Result<Vec<_>, worker::Error>>()?;
+    Ok(())
+}
+
+async fn cleaned_size(bucket: &Bucket) -> Result<u64, worker::Error> {
+    Ok(match bucket.get(CLEANED_SIZE_KEY).execute().await? {
+        Some(obj) => u64::from_be_bytes(
+            obj.body()
+                .ok_or("missing object body")?
+                .bytes()
+                .await?
+                .try_into()
+                .map_err(|_| "failed to read u64")?,
+        ),
+        None => 0,
+    })
+}
+
+async fn set_cleaned_size(size: u64, bucket: &Bucket) -> Result<(), worker::Error> {
+    bucket
+        .put(CLEANED_SIZE_KEY, size.to_be_bytes().to_vec())
+        .execute()
+        .await
+        .map(|_| ())
+}
+
+async fn current_log_size(
+    origin: &KeyName,
+    verifiers: &VerifierList,
+    bucket: &Bucket,
+) -> Result<u64, worker::Error> {
+    let checkpoint_bytes = bucket
+        .get(CHECKPOINT_KEY)
+        .execute()
+        .await?
+        .ok_or("failed to retrieve checkpoint from object storage")?
+        .body()
+        .ok_or("missing object body")?
+        .bytes()
+        .await?;
+    let checkpoint =
+        tlog_tiles::open_checkpoint(origin.as_str(), verifiers, now_millis(), &checkpoint_bytes)
+            .map_err(|e| e.to_string())?
+            .0;
+
+    Ok(checkpoint.size())
+}
+
+// Add to the subrequest count after checking that the new subrequests will not
+// put the worker over the limit.
+//
+// # Errors
+// Will return `CleanupError::Subrequests` if the additional subreqeusts would
+// cause the limit to be exceeded.
+fn checked_add_subrequests(subrequests: &mut usize, new: usize) -> Result<(), CleanupError> {
+    if *subrequests + new > SUBREQUEST_LIMIT {
+        return Err(CleanupError::Subrequests);
+    }
+    *subrequests += new;
+    Ok(())
+}
diff --git a/crates/ct_worker/src/sequencer_do.rs b/crates/ct_worker/src/sequencer_do.rs
@@ -5,12 +5,10 @@
 
 use std::time::Duration;
 
-use crate::{load_signing_key, load_witness_key, CONFIG};
+use crate::{load_checkpoint_signers, load_origin, CONFIG};
 use generic_log_worker::{load_public_bucket, GenericSequencer, SequencerConfig};
 use prometheus::Registry;
-use signed_note::KeyName;
-use static_ct_api::{StaticCTCheckpointSigner, StaticCTLogEntry};
-use tlog_tiles::{CheckpointSigner, Ed25519CheckpointSigner};
+use static_ct_api::StaticCTLogEntry;
 #[allow(clippy::wildcard_imports)]
 use worker::*;
 
@@ -30,36 +28,13 @@ impl DurableObject for Sequencer {
             .find(|(name, _)| id == namespace.id_from_name(name).unwrap().to_string())
             .expect("unable to find sequencer name");
 
-        // https://github.com/C2SP/C2SP/blob/main/static-ct-api.md#checkpoints
-        // The origin line MUST be the submission prefix of the log as a schema-less URL with no trailing slashes.
-        let origin = KeyName::new(
-            params
-                .submission_url
-                .trim_start_matches("http://")
-                .trim_start_matches("https://")
-                .trim_end_matches('/')
-                .to_string(),
-        )
-        .expect("invalid origin name");
+        let origin = load_origin(name);
         let sequence_interval = Duration::from_millis(params.sequence_interval_millis);
 
         // We don't use checkpoint extensions for CT
         let checkpoint_extension = Box::new(|_| vec![]);
 
-        let checkpoint_signers: Vec<Box<dyn CheckpointSigner>> = {
-            let signing_key = load_signing_key(&env, name).unwrap().clone();
-            let witness_key = load_witness_key(&env, name).unwrap().clone();
-
-            // Make the checkpoint signers from the secret keys and put them in a vec
-            let signer = StaticCTCheckpointSigner::new(origin.clone(), signing_key)
-                .map_err(|e| format!("could not create static-ct checkpoint signer: {e}"))
-                .unwrap();
-            let witness = Ed25519CheckpointSigner::new(origin.clone(), witness_key)
-                .map_err(|e| format!("could not create ed25519 checkpoint signer: {e}"))
-                .unwrap();
-
-            vec![Box::new(signer), Box::new(witness)]
-        };
+        let checkpoint_signers = load_checkpoint_signers(&env, name);
         let bucket = load_public_bucket(&env, name).unwrap();
         let registry = Registry::new();
 
diff --git a/crates/ct_worker/wrangler.jsonc b/crates/ct_worker/wrangler.jsonc
@@ -55,7 +55,12 @@
                         "Batcher"
                     ]
                 }
-            ]
+            ],
+            "triggers": {
+                "crons": [
+                    "* * * * *"
+                ]
+            }
         },
         "cftest": {
             "build": {
diff --git a/crates/generic_log_worker/src/log_ops.rs b/crates/generic_log_worker/src/log_ops.rs
@@ -50,7 +50,9 @@ use worker::Error as WorkerError;
 const DATA_TILE_LEVEL_KEY: u8 = u8::MAX;
 /// Same as above, anything above 63 is fine to use as the level key.
 const UNHASHED_TILE_LEVEL_KEY: u8 = u8::MAX - 1;
-const CHECKPOINT_KEY: &str = "checkpoint";
+/// Path used to store checkpoints, both in the object storage and lock backends.
+pub const CHECKPOINT_KEY: &str = "checkpoint";
+/// Path used to store staging bundles in the lock backend.
 const STAGING_KEY: &str = "staging";
 
 // Limit on the number of entries per batch. Tune this parameter to avoid

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,12 @@`
`55`	`55`	`"Batcher"`
`56`	`56`	`]`
`57`	`57`	`}`
`58`		`- ]`
	`58`	`+ ],`
	`59`	`+ "triggers": {`
	`60`	`+ "crons": [`
	`61`	`+ "* * * * *"`
	`62`	`+ ]`
	`63`	`+ }`
`59`	`64`	`},`
`60`	`65`	`"cftest": {`
`61`	`66`	`"build": {`