Skip to content

Commit 91772a4

Browse files
authored
perf(db): Break loading storage logs for recovery into smaller chunks (#3947)
## What ❔ Uses smaller chunks (10,000 entries vs 200,000 used currently) to load storage logs during Merkle tree and state keeper cache recovery. ## Why ❔ Benchmarking indicates that breaking loading into smaller chunks makes it faster. ## Is this a breaking change? - [ ] Yes - [x] No ## Operational changes No operational changes. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`.
1 parent d611838 commit 91772a4

File tree

3 files changed

+95
-71
lines changed

3 files changed

+95
-71
lines changed

core/lib/dal/.sqlx/query-054dd5de059302fb2419782722c1c3852b3f7e49348931e3fa86102cfb702783.json

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/lib/dal/.sqlx/query-2ae0541e9af1a9966585a25dfe772cb2ea9f2209fe2c12dda6c72c96bdb496d3.json

Lines changed: 0 additions & 36 deletions
This file was deleted.

core/lib/dal/src/storage_logs_dal.rs

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ use zksync_db_connection::{
88
write_str, writeln_str,
99
};
1010
use zksync_types::{
11-
get_code_key, snapshots::SnapshotStorageLog, AccountTreeId, Address, L1BatchNumber,
12-
L2BlockNumber, StorageKey, StorageLog, FAILED_CONTRACT_DEPLOYMENT_BYTECODE_HASH, H160, H256,
11+
get_code_key, h256_to_u256, snapshots::SnapshotStorageLog, u256_to_h256, AccountTreeId,
12+
Address, L1BatchNumber, L2BlockNumber, StorageKey, StorageLog,
13+
FAILED_CONTRACT_DEPLOYMENT_BYTECODE_HASH, H160, H256,
1314
};
1415

1516
pub use crate::models::storage_log::{DbStorageLog, StorageRecoveryLogEntry};
@@ -722,44 +723,66 @@ impl StorageLogsDal<'_, '_> {
722723
}
723724

724725
/// Fetches tree entries for the specified `l2_block_number` and `key_range`. This is used during
725-
/// Merkle tree recovery.
726+
/// Merkle tree and RocksDB cache recovery.
726727
pub async fn get_tree_entries_for_l2_block(
727728
&mut self,
728729
l2_block_number: L2BlockNumber,
729-
key_range: ops::RangeInclusive<H256>,
730+
mut key_range: ops::RangeInclusive<H256>,
730731
) -> DalResult<Vec<StorageRecoveryLogEntry>> {
731-
let rows = sqlx::query!(
732-
r#"
733-
SELECT
734-
storage_logs.hashed_key,
735-
storage_logs.value,
736-
initial_writes.index
737-
FROM
738-
storage_logs
739-
INNER JOIN initial_writes ON storage_logs.hashed_key = initial_writes.hashed_key
740-
WHERE
741-
storage_logs.miniblock_number <= $1
742-
AND storage_logs.hashed_key >= $2::bytea
743-
AND storage_logs.hashed_key <= $3::bytea
744-
ORDER BY
745-
storage_logs.hashed_key
746-
"#,
747-
i64::from(l2_block_number.0),
748-
key_range.start().as_bytes(),
749-
key_range.end().as_bytes()
750-
)
751-
.instrument("get_tree_entries_for_l2_block")
752-
.with_arg("l2_block_number", &l2_block_number)
753-
.with_arg("key_range", &key_range)
754-
.fetch_all(self.storage)
755-
.await?;
732+
const QUERY_LIMIT: usize = 10_000;
756733

757-
let rows = rows.into_iter().map(|row| StorageRecoveryLogEntry {
758-
key: H256::from_slice(&row.hashed_key),
759-
value: H256::from_slice(&row.value),
760-
leaf_index: row.index as u64,
761-
});
762-
Ok(rows.collect())
734+
// Break fetching from the DB into smaller chunks to make DB load more uniform.
735+
let mut entries = vec![];
736+
loop {
737+
let rows = sqlx::query!(
738+
r#"
739+
SELECT
740+
storage_logs.hashed_key,
741+
storage_logs.value,
742+
initial_writes.index
743+
FROM
744+
storage_logs
745+
INNER JOIN initial_writes ON storage_logs.hashed_key = initial_writes.hashed_key
746+
WHERE
747+
storage_logs.miniblock_number <= $1
748+
AND storage_logs.hashed_key >= $2::bytea
749+
AND storage_logs.hashed_key <= $3::bytea
750+
ORDER BY
751+
storage_logs.hashed_key
752+
LIMIT
753+
$4
754+
"#,
755+
i64::from(l2_block_number.0),
756+
key_range.start().as_bytes(),
757+
key_range.end().as_bytes(),
758+
QUERY_LIMIT as i32
759+
)
760+
.instrument("get_tree_entries_for_l2_block")
761+
.with_arg("l2_block_number", &l2_block_number)
762+
.with_arg("key_range", &key_range)
763+
.fetch_all(self.storage)
764+
.await?;
765+
766+
let fetched_count = rows.len();
767+
entries.extend(rows.into_iter().map(|row| StorageRecoveryLogEntry {
768+
key: H256::from_slice(&row.hashed_key),
769+
value: H256::from_slice(&row.value),
770+
leaf_index: row.index as u64,
771+
}));
772+
773+
if fetched_count < QUERY_LIMIT {
774+
break;
775+
}
776+
// `unwrap()` is safe: `entries` contains >= QUERY_LIMIT items.
777+
let Some(next_key) = h256_to_u256(entries.last().unwrap().key).checked_add(1.into())
778+
else {
779+
// A marginal case (likely not reproducible in practice): the last hashed key is `H256::repeat_byte(0xff)`.
780+
break;
781+
};
782+
key_range = u256_to_h256(next_key)..=*key_range.end();
783+
}
784+
785+
Ok(entries)
763786
}
764787

765788
/// Returns `true` if the number of logs at the specified L2 block is greater or equal to `min_count`.

0 commit comments

Comments
 (0)