succinctlabs
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎book/SUMMARY.md‎
Lines changed: 1 addition & 1 deletion b/‎book/SUMMARY.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎book/advanced/cost-estimation-tools.md‎
Lines changed: 129 additions & 0 deletions b/‎book/advanced/cost-estimation-tools.md‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎book/advanced/cost-estimator.md‎
Lines changed: 0 additions & 53 deletions b/‎book/advanced/cost-estimator.md‎
Lines changed: 0 additions & 53 deletions
diff --git a/‎scripts/prove/bin/multi.rs‎
Lines changed: 51 additions & 14 deletions b/‎scripts/prove/bin/multi.rs‎
Lines changed: 51 additions & 14 deletions
diff --git a/‎scripts/utils/bin/cost_estimator.rs‎
Lines changed: 36 additions & 4 deletions b/‎scripts/utils/bin/cost_estimator.rs‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎scripts/utils/src/lib.rs‎
Lines changed: 2 additions & 2 deletions b/‎scripts/utils/src/lib.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎utils/host/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎utils/host/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
@@ -43,7 +43,7 @@
     - [EigenDA DA](./fault_proofs/experimental/eigenda.md)
 
 - [Advanced](./advanced/intro.md)
-  - [Cost Estimator](./advanced/cost-estimator.md)
+  - [Cost Estimation Tools](./advanced/cost-estimation-tools.md)
   - [Reproduce Binaries](./advanced/verify-binaries.md)
   - [Node Setup](./advanced/node-setup.md)
   - [FAQ](./faq.md)
 
@@ -0,0 +1,129 @@
+# Cost Estimation Tools
+
+This guide covers the scripts for estimating proving costs and testing execution: `multi` and `cost-estimator`.
+
+## Setup
+
+Before running these scripts, set up a `.env` file in the project root:
+
+```bash
+L1_RPC=<YOUR_L1_RPC_ENDPOINT>
+L2_RPC=<YOUR_L2_RPC_ENDPOINT>
+L2_NODE_RPC=<YOUR_L2_NODE_RPC_ENDPOINT>
+```
+
+## Multi Script
+
+The `multi` script executes the OP Succinct range proof program for a block range. Use it to test proof generation or generate actual proofs.
+
+### Usage
+
+```bash
+# Execute without proving (generates execution report)
+cargo run --bin multi -- --start 1000 --end 1020
+
+# Generate compressed proofs
+cargo run --bin multi -- --start 1000 --end 1020 --prove
+```
+
+### Output
+
+- **Execution mode**: Prints execution stats and saves to `execution-reports/multi/{chain_id}/{start}-{end}.csv`
+- **Prove mode**: Saves proof to `data/{chain_id}/proofs/{start}-{end}.bin`
+
+## Cost Estimator
+
+The `cost-estimator` estimates proving costs without generating proofs. It splits large ranges into batches and runs them in parallel.
+
+### Usage
+
+```bash
+cargo run --bin cost-estimator -- \
+    --start 2000000 \
+    --end 2001800 \
+    --batch-size 300
+```
+
+For best estimation, use a range bigger than the batcher interval with batch size equal to the range.
+
+### Output
+
+Execution report saved to `execution-reports/{chain_id}/{start}-{end}-report.csv` with metrics:
+- Total instruction count
+- Oracle verification / derivation / block execution costs
+- SP1 gas usage
+- Transaction counts and EVM gas
+- Precompile cycles (BN pair, add, mul, KZG eval, etc.)
+
+## Witness Caching
+
+Both scripts support witness caching to skip the time-consuming witness generation step on subsequent runs.
+
+### Why Cache?
+
+The proving pipeline has two stages:
+
+```
+host.run() → WitnessData → get_sp1_stdin() → SP1Stdin
+   [hours]                    [milliseconds]
+```
+
+Witness generation (`host.run()`) fetches L1/L2 data and executes blocks, which can take **hours** for large ranges. Caching saves this data to disk.
+
+We cache `SP1Stdin` because:
+1. It skips the hours-long `host.run()` bottleneck
+2. SP1Stdin implements serde serialization via bincode
+3. The cache is compatible across Ethereum and Celestia DA (both use the same witness format)
+
+Note: The `get_sp1_stdin()` conversion is milliseconds, so caching after this step has negligible overhead.
+
+### Cache Flag
+
+Use `--cache` to enable caching. If a cache file exists for the block range, it will be loaded. Otherwise, witness generation runs and the result is saved to cache.
+
+### Examples
+
+```bash
+# First run: generates witness and saves to cache
+cargo run --bin multi -- --start 1000 --end 1020 --cache
+
+# Second run: loads from cache (instant), then proves
+cargo run --bin multi -- --start 1000 --end 1020 --cache --prove
+
+# Force regenerate by deleting cache first
+rm data/{chain_id}/witness-cache/1000-1020-stdin.bin
+cargo run --bin multi -- --start 1000 --end 1020 --cache
+
+# Cost estimator with caching
+cargo run --bin cost-estimator -- --start 1000 --end 1100 --batch-size 10 --cache
+```
+
+### Cache Location
+
+```
+data/{chain_id}/witness-cache/{start_block}-{end_block}-stdin.bin
+```
+
+Example: `data/8453/witness-cache/1000-1020-stdin.bin` for Base.
+
+### DA Compatibility
+
+| DA Type | Compatible With |
+|---------|-----------------|
+| Ethereum (default) | Celestia |
+| Celestia | Ethereum |
+| EigenDA | EigenDA only |
+
+Cache files are compatible between Ethereum and Celestia (both use `DefaultWitnessData`), but **not** with EigenDA (uses `EigenDAWitnessData`). Don't mix cache files across incompatible DA types.
+
+### Cache Management
+
+```bash
+# Clear all cache for a chain
+rm -rf data/{chain_id}/witness-cache/
+
+# Clear specific range
+rm data/{chain_id}/witness-cache/{start}-{end}-stdin.bin
+```
+
+Cache files are typically 100MB-1GB per range.
@@ -1,15 +1,23 @@
 use anyhow::{Context, Result};
 use clap::Parser;
 use op_succinct_host_utils::{
-    block_range::get_validated_block_range, fetcher::OPSuccinctDataFetcher, host::OPSuccinctHost,
-    stats::ExecutionStats, witness_generation::WitnessGenerator,
+    block_range::get_validated_block_range,
+    fetcher::OPSuccinctDataFetcher,
+    host::OPSuccinctHost,
+    stats::ExecutionStats,
+    witness_cache::{load_stdin_from_cache, save_stdin_to_cache},
+    witness_generation::WitnessGenerator,
 };
 use op_succinct_proof_utils::{get_range_elf_embedded, initialize_host};
 use op_succinct_prove::execute_multi;
 use op_succinct_scripts::HostExecutorArgs;
 use sp1_sdk::{utils, ProverClient};
-use std::{fs, sync::Arc, time::Instant};
-use tracing::debug;
+use std::{
+    fs,
+    sync::Arc,
+    time::{Duration, Instant},
+};
+use tracing::{debug, info, warn};
 
 /// Execute the OP Succinct program for multiple blocks.
 #[tokio::main]
@@ -35,16 +43,47 @@ async fn main() -> Result<()> {
     )
     .await?;
 
-    let host_args = host.fetch(l2_start_block, l2_end_block, None, args.safe_db_fallback).await?;
+    let l2_chain_id = data_fetcher.get_l2_chain_id().await?;
+
+    // Helper closure to generate stdin (runs witness generation and converts to SP1Stdin)
+    let generate_stdin = || async {
+        let host_args =
+            host.fetch(l2_start_block, l2_end_block, None, args.safe_db_fallback).await?;
+        debug!("Host args: {:?}", host_args);
 
-    debug!("Host args: {:?}", host_args);
+        let start_time = Instant::now();
+        let witness = host.run(&host_args).await?;
+        let duration = start_time.elapsed();
 
-    let start_time = Instant::now();
-    let witness_data = host.run(&host_args).await?;
-    let witness_generation_duration = start_time.elapsed();
+        // Convert witness to SP1Stdin
+        let stdin = host.witness_generator().get_sp1_stdin(witness)?;
 
-    // Get the stdin for the block.
-    let sp1_stdin = host.witness_generator().get_sp1_stdin(witness_data)?;
+        // Save to cache if enabled
+        if args.cache {
+            let cache_path =
+                save_stdin_to_cache(l2_chain_id, l2_start_block, l2_end_block, &stdin)?;
+            info!("Saved stdin to cache: {}", cache_path.display());
+        }
+
+        Ok::<_, anyhow::Error>((stdin, duration))
+    };
+
+    // Check cache first if enabled (with graceful fallback)
+    let (sp1_stdin, witness_generation_duration) = if args.cache {
+        match load_stdin_from_cache(l2_chain_id, l2_start_block, l2_end_block) {
+            Ok(Some(stdin)) => {
+                info!("Loaded stdin from cache");
+                (stdin, Duration::ZERO)
+            }
+            Ok(None) => generate_stdin().await?,
+            Err(e) => {
+                warn!("Failed to load cache: {e}, regenerating...");
+                generate_stdin().await?
+            }
+        }
+    } else {
+        generate_stdin().await?
+    };
 
     let prover = ProverClient::from_env();
 
@@ -55,7 +94,7 @@ async fn main() -> Result<()> {
         let proof = prover.prove(&pk, &sp1_stdin).compressed().run().unwrap();
 
         // Create a proof directory for the chain ID if it doesn't exist.
-        let proof_dir = format!("data/{}/proofs", data_fetcher.get_l2_chain_id().await.unwrap());
+        let proof_dir = format!("data/{}/proofs", l2_chain_id);
         if !std::path::Path::new(&proof_dir).exists() {
             fs::create_dir_all(&proof_dir).unwrap();
         }
@@ -64,8 +103,6 @@ async fn main() -> Result<()> {
             .save(format!("{proof_dir}/{l2_start_block}-{l2_end_block}.bin"))
             .expect("saving proof failed");
     } else {
-        let l2_chain_id = data_fetcher.get_l2_chain_id().await?;
-
         let (block_data, report, execution_duration) =
             execute_multi(&data_fetcher, sp1_stdin, l2_start_block, l2_end_block).await?;
 
 
@@ -10,6 +10,7 @@ use op_succinct_host_utils::{
     fetcher::OPSuccinctDataFetcher,
     host::OPSuccinctHost,
     stats::ExecutionStats,
+    witness_cache::{load_stdin_from_cache, save_stdin_to_cache},
     witness_generation::WitnessGenerator,
 };
 use op_succinct_proof_utils::{get_range_elf_embedded, initialize_host};
@@ -26,14 +27,18 @@ use std::{
 
 /// Run the zkVM execution process for each split range in parallel. Writes the execution stats for
 /// each block range to a CSV file after each execution completes (not guaranteed to be in order).
-async fn execute_blocks_and_write_stats_csv<H: OPSuccinctHost>(
+async fn execute_blocks_and_write_stats_csv<H>(
     host: Arc<H>,
     host_args: &[H::Args],
     ranges: Vec<SpanBatchRange>,
     l2_chain_id: u64,
     start: u64,
     end: u64,
-) -> Result<()> {
+    cache_enabled: bool,
+) -> Result<()>
+where
+    H: OPSuccinctHost,
+{
     let data_fetcher = OPSuccinctDataFetcher::new_with_rollup_config().await?;
 
     // Fetch all of the execution stats block ranges in parallel.
@@ -67,12 +72,38 @@ async fn execute_blocks_and_write_stats_csv<H: OPSuccinctHost>(
     let prover = ProverClient::builder().cpu().build();
 
     // Run the host tasks in parallel using join_all
-    let handles = host_args.iter().map(|host_args| {
+    let handles = host_args.iter().zip(ranges.iter()).map(|(host_args, range)| {
         let host_args = host_args.clone();
         let host = host.clone();
+        let start = range.start;
+        let end = range.end;
         tokio::spawn(async move {
+            // Try loading SP1Stdin from cache
+            if cache_enabled {
+                match load_stdin_from_cache(l2_chain_id, start, end) {
+                    Ok(Some(stdin)) => {
+                        info!("Loaded stdin from cache for range {}-{}", start, end);
+                        return stdin;
+                    }
+                    Ok(None) => {} // No cache, generate below
+                    Err(e) => {
+                        log::warn!("Failed to load stdin cache for range {}-{}: {e}", start, end);
+                    }
+                }
+            }
+
+            // Generate witness and convert to SP1Stdin
             let witness_data = host.run(&host_args).await.unwrap();
-            host.witness_generator().get_sp1_stdin(witness_data).unwrap()
+            let stdin = host.witness_generator().get_sp1_stdin(witness_data).unwrap();
+
+            // Save SP1Stdin to cache
+            if cache_enabled {
+                if let Ok(cache_path) = save_stdin_to_cache(l2_chain_id, start, end, &stdin) {
+                    info!("Saved stdin to cache: {}", cache_path.display());
+                }
+            }
+
+            stdin
         })
     });
 
@@ -238,6 +269,7 @@ async fn main() -> Result<()> {
         l2_chain_id,
         l2_start_block,
         l2_end_block,
+        args.cache,
     )
     .await?;
 
 
@@ -15,9 +15,9 @@ pub struct HostExecutorArgs {
     /// The number of blocks to execute in a single batch.
     #[arg(long, default_value = "10")]
     pub batch_size: u64,
-    /// Use cached witness generation.
+    /// Enable caching: load from cache if available, save to cache if not.
     #[arg(long)]
-    pub use_cache: bool,
+    pub cache: bool,
     /// Use a fixed recent range.
     #[arg(long)]
     pub rolling: bool,
 
@@ -50,6 +50,7 @@ alloy-sol-types.workspace = true
 # general
 anyhow.workspace = true
 async-trait.workspace = true
+bincode.workspace = true
 cfg-if.workspace = true
 c-kzg.workspace = true
 futures.workspace = true