Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/forest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,28 @@ jobs:
- name: Snapshot export check v2
run: ./scripts/tests/calibnet_export_f3_check.sh
timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }}
calibnet-export-diff-check:
needs:
- build-ubuntu
name: Diff snapshot export checks
runs-on: ubuntu-24.04
steps:
- run: lscpu
- uses: actions/cache@v4
with:
path: "${{ env.FIL_PROOFS_PARAMETER_CACHE }}"
key: proof-params-keys
- uses: actions/checkout@v4
- uses: actions/download-artifact@v5
with:
name: "forest-${{ runner.os }}"
path: ~/.cargo/bin
- name: Set permissions
run: |
chmod +x ~/.cargo/bin/forest*
- name: Diff snapshot export check
run: ./scripts/tests/calibnet_export_diff_check.sh
timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }}
calibnet-no-discovery-checks:
needs:
- build-ubuntu
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

### Added

- [#6074](https://github.com/ChainSafe/forest/issues/6074) Added `forest-cli snapshot export-diff` subcommand for exporting a diff snapshot.

### Changed

### Removed
Expand Down
25 changes: 25 additions & 0 deletions scripts/tests/calibnet_export_diff_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# This script is checking the correctness of
# the diff snapshot export feature.
# It requires both the `forest` and `forest-cli` binaries to be in the PATH.

set -euo pipefail

source "$(dirname "$0")/harness.sh"

forest_init "$@"

db_path=$($FOREST_TOOL_PATH db stats --chain calibnet | grep "Database path:" | cut -d':' -f2- | xargs)
snapshot=$(find "$db_path/car_db"/*.car.zst | tail -n 1)
snapshot_epoch=$(forest_query_epoch "$snapshot")

echo "Exporting diff snapshot @ $snapshot_epoch with forest-cli snapshot export-diff"
$FOREST_CLI_PATH snapshot export-diff --from "$snapshot_epoch" --to "$((snapshot_epoch - 900))" -d 900 -o diff1

$FOREST_CLI_PATH shutdown --force

echo "Exporting diff snapshot @ $snapshot_epoch with forest-tool archive export"
$FOREST_TOOL_PATH archive export --epoch "$snapshot_epoch" --depth 900 --diff "$((snapshot_epoch - 900))" --diff-depth 900 -o diff2 "$snapshot"

echo "Comparing diff snapshots"
diff diff1 diff2
Comment on lines +1 to +25
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Harden the script (pipefail, robust snapshot selection, and deterministic compare).

Current use of globbed find and plain diff is brittle. Add pipefail, fail fast if no snapshot, and pick the newest snapshot deterministically. Also prefer comparing normalized metadata to avoid false negatives from compression framing.

Apply:

@@
-set -eu
+set -euo pipefail
@@
-db_path=$($FOREST_TOOL_PATH db stats --chain calibnet | grep "Database path:" | cut -d':' -f2- | xargs)
-snapshot=$(find "$db_path/car_db"/*.car.zst | tail -n 1)
-snapshot_epoch=$(forest_query_epoch "$snapshot")
+db_path="$(
+  $FOREST_TOOL_PATH db stats --chain calibnet \
+    | grep -E "^Database path:" \
+    | cut -d':' -f2- \
+    | xargs
+)"
+# Pick latest snapshot by mtime; fail if none.
+snapshot="$(ls -1t "$db_path"/car_db/*.car.zst 2>/dev/null | head -n1 || true)"
+if [[ -z "${snapshot:-}" ]]; then
+  echo "Error: no *.car.zst snapshots found under: $db_path/car_db" >&2
+  exit 1
+fi
+snapshot_epoch="$(forest_query_epoch "$snapshot")"
@@
-echo "Exporting diff snapshot @ $snapshot_epoch with forest-cli snapshot export-diff"
-$FOREST_CLI_PATH snapshot export-diff --from "$snapshot_epoch" --to "$((snapshot_epoch - 900))" -d 900 -o diff1
+DEPTH="${DEPTH:-900}"
+TO_EPOCH="$((snapshot_epoch - DEPTH))"
+echo "Exporting diff snapshot @ $snapshot_epoch with forest-cli snapshot export-diff (to=$TO_EPOCH depth=$DEPTH)"
+$FOREST_CLI_PATH snapshot export-diff --from "$snapshot_epoch" --to "$TO_EPOCH" -d "$DEPTH" -o diff1
@@
-echo "Exporting diff snapshot @ $snapshot_epoch with forest-tool archive export"
-$FOREST_TOOL_PATH archive export --epoch "$snapshot_epoch" --depth 900 --diff "$((snapshot_epoch - 900))" --diff-depth 900 -o diff2 "$snapshot"
+echo "Exporting diff snapshot @ $snapshot_epoch with forest-tool archive export"
+$FOREST_TOOL_PATH archive export --epoch "$snapshot_epoch" --depth "$DEPTH" --diff "$TO_EPOCH" --diff-depth "$DEPTH" -o diff2 "$snapshot"
@@
-echo "Comparing diff snapshots"
-diff diff1 diff2
+echo "Comparing diff snapshots (metadata)"
+$FOREST_TOOL_PATH archive info diff1 > info1.txt
+$FOREST_TOOL_PATH archive info diff2 > info2.txt
+diff -u info1.txt info2.txt

Optional: if strict byte-for-byte comparison is desired and deterministic, keep a second diff diff1 diff2 step.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
#!/usr/bin/env bash
# This script is checking the correctness of
# the diff snapshot export feature.
# It requires both the `forest` and `forest-cli` binaries to be in the PATH.
set -eu
source "$(dirname "$0")/harness.sh"
forest_init "$@"
db_path=$($FOREST_TOOL_PATH db stats --chain calibnet | grep "Database path:" | cut -d':' -f2- | xargs)
snapshot=$(find "$db_path/car_db"/*.car.zst | tail -n 1)
snapshot_epoch=$(forest_query_epoch "$snapshot")
echo "Exporting diff snapshot @ $snapshot_epoch with forest-cli snapshot export-diff"
$FOREST_CLI_PATH snapshot export-diff --from "$snapshot_epoch" --to "$((snapshot_epoch - 900))" -d 900 -o diff1
$FOREST_CLI_PATH shutdown --force
echo "Exporting diff snapshot @ $snapshot_epoch with forest-tool archive export"
$FOREST_TOOL_PATH archive export --epoch "$snapshot_epoch" --depth 900 --diff "$((snapshot_epoch - 900))" --diff-depth 900 -o diff2 "$snapshot"
echo "Comparing diff snapshots"
diff diff1 diff2
#!/usr/bin/env bash
# This script is checking the correctness of
# the diff snapshot export feature.
# It requires both the `forest` and `forest-cli` binaries to be in the PATH.
set -euo pipefail
source "$(dirname "$0")/harness.sh"
forest_init "$@"
db_path="$(
$FOREST_TOOL_PATH db stats --chain calibnet \
| grep -E "^Database path:" \
| cut -d':' -f2- \
| xargs
)"
# Pick latest snapshot by mtime; fail if none.
snapshot="$(ls -1t "$db_path"/car_db/*.car.zst 2>/dev/null | head -n1 || true)"
if [[ -z "${snapshot:-}" ]]; then
echo "Error: no *.car.zst snapshots found under: $db_path/car_db" >&2
exit 1
fi
snapshot_epoch="$(forest_query_epoch "$snapshot")"
DEPTH="${DEPTH:-900}"
TO_EPOCH="$((snapshot_epoch - DEPTH))"
echo "Exporting diff snapshot @ $snapshot_epoch with forest-cli snapshot export-diff (to=$TO_EPOCH depth=$DEPTH)"
$FOREST_CLI_PATH snapshot export-diff --from "$snapshot_epoch" --to "$TO_EPOCH" -d "$DEPTH" -o diff1
$FOREST_CLI_PATH shutdown --force
echo "Exporting diff snapshot @ $snapshot_epoch with forest-tool archive export"
$FOREST_TOOL_PATH archive export --epoch "$snapshot_epoch" --depth "$DEPTH" --diff "$TO_EPOCH" --diff-depth "$DEPTH" -o diff2 "$snapshot"
echo "Comparing diff snapshots (metadata)"
$FOREST_TOOL_PATH archive info diff1 > info1.txt
$FOREST_TOOL_PATH archive info diff2 > info2.txt
diff -u info1.txt info2.txt
🤖 Prompt for AI Agents
In scripts/tests/calibnet_export_diff_check.sh around lines 1 to 25, the script
is brittle: it lacks pipefail, doesn't fail if no snapshot is found, selects
snapshots non-deterministically, and uses a raw diff that can report false
differences due to compression framing. Add set -o pipefail alongside set -eu;
after finding snapshots, check and exit non-zero if none found; select the
newest snapshot deterministically (e.g. sort by mtime or use a reliable newest
selection) instead of relying on glob order; when comparing outputs, normalize
or decompress archives/metadata before diff (so compression framing won't
produce false mismatches) and keep an optional final strict byte-for-byte
comparison step if desired.

87 changes: 87 additions & 0 deletions src/cli/subcommands/snapshot_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::chain_sync::SyncConfig;
use crate::cli_shared::snapshot::{self, TrustedVendor};
use crate::db::car::forest::new_forest_car_temp_path_in;
use crate::networks::calibnet;
use crate::rpc::chain::ForestChainExportDiffParams;
use crate::rpc::{self, chain::ForestChainExportParams, prelude::*, types::ApiTipsetKey};
use anyhow::Context as _;
use chrono::DateTime;
Expand Down Expand Up @@ -40,6 +41,21 @@ pub enum SnapshotCommands {
#[arg(long, value_enum, default_value_t = FilecoinSnapshotVersion::V1)]
format: FilecoinSnapshotVersion,
},
/// Export a diff snapshot between `from` and `to` epochs to `<output_path>`
ExportDiff {
/// `./forest_snapshot_diff_{chain}_{from}_{to}+{depth}.car.zst`.
#[arg(short, long, default_value = ".", verbatim_doc_comment)]
output_path: PathBuf,
/// Epoch to export from
#[arg(long)]
from: i64,
/// Epoch to diff against
#[arg(long)]
to: i64,
/// How many state-roots to include. Lower limit is 900 for `calibnet` and `mainnet`.
#[arg(short, long)]
depth: Option<crate::chain::ChainEpochDelta>,
},
}

impl SnapshotCommands {
Expand Down Expand Up @@ -138,6 +154,77 @@ impl SnapshotCommands {
println!("Export completed.");
Ok(())
}
Self::ExportDiff {
output_path,
from,
to,
depth,
} => {
let raw_network_name = StateNetworkName::call(&client, ()).await?;

// For historical reasons and backwards compatibility if snapshot services or their
// consumers relied on the `calibnet`, we use `calibnet` as the chain name.
let chain_name = if raw_network_name == calibnet::NETWORK_GENESIS_NAME {
calibnet::NETWORK_COMMON_NAME
} else {
raw_network_name.as_str()
};

let depth = depth.unwrap_or_else(|| from - to);
Comment thread
hanabi1224 marked this conversation as resolved.
anyhow::ensure!(depth > 0, "depth must be positive");

let output_path = match output_path.is_dir() {
true => output_path.join(format!(
"forest_snapshot_diff_{chain_name}_{from}_{to}+{depth}.car.zst"
)),
false => output_path.clone(),
};

let output_dir = output_path.parent().context("invalid output path")?;
let temp_path = new_forest_car_temp_path_in(output_dir)?;

let params = ForestChainExportDiffParams {
output_path: temp_path.to_path_buf(),
from,
to,
depth,
};

let pb = ProgressBar::new_spinner().with_style(
ProgressStyle::with_template(
"{spinner} {msg} {binary_total_bytes} written in {elapsed} ({binary_bytes_per_sec})",
)
.expect("indicatif template must be valid"),
).with_message(format!("Exporting {} ...", output_path.display()));
pb.enable_steady_tick(std::time::Duration::from_millis(80));
let handle = tokio::spawn({
let path: PathBuf = (&temp_path).into();
let pb = pb.clone();
let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(1));
async move {
loop {
interval.tick().await;
if let Ok(meta) = std::fs::metadata(&path) {
pb.set_position(meta.len());
}
}
}
});

// Manually construct RpcRequest because snapshot export could
// take a few hours on mainnet
client
.call(ForestChainExportDiff::request((params,))?.with_timeout(Duration::MAX))
.await?;

handle.abort();
pb.finish();
_ = handle.await;

temp_path.persist(output_path)?;
println!("Export completed.");
Ok(())
}
}
}
}
Expand Down
18 changes: 9 additions & 9 deletions src/ipld/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ pin_project! {
db: DB,
dfs: VecDeque<Task>, // Depth-first work queue.
seen: CidHashSet,
stateroot_limit: ChainEpoch,
stateroot_limit_exclusive: ChainEpoch,
fail_on_dead_links: bool,
}
}
Expand Down Expand Up @@ -140,20 +140,20 @@ impl<DB, T> ChainStream<DB, T> {
///
/// * `db` - A database that implements [`Blockstore`] interface.
/// * `tipset_iter` - An iterator of [`Tipset`], descending order `$child -> $parent`.
/// * `stateroot_limit` - An epoch that signifies how far back we need to inspect tipsets,
/// * `stateroot_limit` - An epoch that signifies how far back (exclusive) we need to inspect tipsets,
/// in-depth. This has to be pre-calculated using this formula: `$cur_epoch - $depth`, where `$depth`
/// is the number of `[`Tipset`]` that needs inspection.
pub fn stream_chain<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin>(
db: DB,
tipset_iter: ITER,
stateroot_limit: ChainEpoch,
stateroot_limit_exclusive: ChainEpoch,
) -> ChainStream<DB, ITER> {
ChainStream {
tipset_iter,
db,
dfs: VecDeque::new(),
seen: CidHashSet::default(),
stateroot_limit,
stateroot_limit_exclusive,
fail_on_dead_links: true,
}
}
Expand All @@ -163,9 +163,9 @@ pub fn stream_chain<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T>
pub fn stream_graph<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin>(
db: DB,
tipset_iter: ITER,
stateroot_limit: ChainEpoch,
stateroot_limit_exclusive: ChainEpoch,
) -> ChainStream<DB, ITER> {
stream_chain(db, tipset_iter, stateroot_limit).fail_on_dead_links(false)
stream_chain(db, tipset_iter, stateroot_limit_exclusive).fail_on_dead_links(false)
}

impl<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin> Stream
Expand All @@ -177,7 +177,7 @@ impl<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin> Stream
use Task::*;

let fail_on_dead_links = self.fail_on_dead_links;
let stateroot_limit = self.stateroot_limit;
let stateroot_limit_exclusive = self.stateroot_limit_exclusive;
let this = self.project();

loop {
Expand Down Expand Up @@ -253,7 +253,7 @@ impl<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin> Stream
}

// Process block messages.
if block.epoch > stateroot_limit {
if block.epoch > stateroot_limit_exclusive {
this.dfs.push_back(Iterate(
block.epoch,
*block.cid(),
Expand All @@ -266,7 +266,7 @@ impl<DB: Blockstore, T: Borrow<Tipset>, ITER: Iterator<Item = T> + Unpin> Stream

// Visit the block if it's within required depth. And a special case for `0`
// epoch to match Lotus' implementation.
if block.epoch == 0 || block.epoch > stateroot_limit {
if block.epoch == 0 || block.epoch > stateroot_limit_exclusive {
// NOTE: In the original `walk_snapshot` implementation we walk the dag
// immediately. Which is what we do here as well, but using a queue.
this.dfs.push_back(Iterate(
Expand Down
71 changes: 68 additions & 3 deletions src/rpc/methods/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ use tokio::task::JoinHandle;

const HEAD_CHANNEL_CAPACITY: usize = 10;

static CHAIN_EXPORT_LOCK: LazyLock<Mutex<()>> = LazyLock::new(|| Mutex::new(()));

Comment thread
LesnyRumcajs marked this conversation as resolved.
/// Subscribes to head changes from the chain store and broadcasts new blocks.
///
/// # Notes
Expand Down Expand Up @@ -325,9 +327,7 @@ impl RpcMethod<1> for ForestChainExport {
dry_run,
} = params;

static LOCK: LazyLock<Mutex<()>> = LazyLock::new(|| Mutex::new(()));

let _locked = LOCK.try_lock();
let _locked = CHAIN_EXPORT_LOCK.try_lock();
if _locked.is_err() {
return Err(anyhow::anyhow!("Another chain export job is still in progress").into());
}
Expand Down Expand Up @@ -404,6 +404,62 @@ impl RpcMethod<1> for ForestChainExport {
}
}

pub enum ForestChainExportDiff {}
impl RpcMethod<1> for ForestChainExportDiff {
const NAME: &'static str = "Forest.ChainExportDiff";
const PARAM_NAMES: [&'static str; 1] = ["params"];
const API_PATHS: BitFlags<ApiPaths> = ApiPaths::all();
const PERMISSION: Permission = Permission::Read;

type Params = (ForestChainExportDiffParams,);
type Ok = ();

async fn handle(
ctx: Ctx<impl Blockstore + Send + Sync + 'static>,
(params,): Self::Params,
) -> Result<Self::Ok, ServerError> {
let ForestChainExportDiffParams {
from,
to,
depth,
output_path,
} = params;

let _locked = CHAIN_EXPORT_LOCK.try_lock();
if _locked.is_err() {
return Err(
anyhow::anyhow!("Another chain export diff job is still in progress").into(),
);
}

let chain_finality = ctx.chain_config().policy.chain_finality;
if depth < chain_finality {
return Err(
anyhow::anyhow!(format!("depth must be greater than {chain_finality}")).into(),
);
}

let head = ctx.chain_store().heaviest_tipset();
let start_ts =
ctx.chain_index()
.tipset_by_height(from, head, ResolveNullTipset::TakeOlder)?;

crate::tool::subcommands::archive_cmd::do_export(
&ctx.store_owned(),
start_ts,
output_path,
None,
depth,
Some(to),
Some(chain_finality),
true,
)
.await?;

Ok(())
}
}

pub enum ChainExport {}
impl RpcMethod<1> for ChainExport {
const NAME: &'static str = "Filecoin.ChainExport";
Expand Down Expand Up @@ -1000,6 +1056,15 @@ pub struct ForestChainExportParams {
}
lotus_json_with_self!(ForestChainExportParams);

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ForestChainExportDiffParams {
pub from: ChainEpoch,
pub to: ChainEpoch,
pub depth: i64,
pub output_path: PathBuf,
}
lotus_json_with_self!(ForestChainExportDiffParams);

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ChainExportParams {
pub epoch: ChainEpoch,
Expand Down
1 change: 1 addition & 0 deletions src/rpc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ macro_rules! for_each_rpc_method {
$callback!($crate::rpc::chain::ChainStatObj);
$callback!($crate::rpc::chain::ChainTipSetWeight);
$callback!($crate::rpc::chain::ForestChainExport);
$callback!($crate::rpc::chain::ForestChainExportDiff);

// common vertical
$callback!($crate::rpc::common::Session);
Expand Down
1 change: 1 addition & 0 deletions src/tool/subcommands/api_cmd/test_snapshots_ignored.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ Filecoin.WalletSign
Filecoin.WalletSignMessage
Filecoin.Web3ClientVersion
Forest.ChainExport
Forest.ChainExportDiff
Forest.ChainGetMinBaseFee
Forest.NetInfo
Forest.SnapshotGC
Expand Down
Loading
Loading