Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 43 additions & 3 deletions substrate/utils/frame/benchmarking-cli/src/storage/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use sp_state_machine::Storage;
use sp_storage::{ChildInfo, ChildType, PrefixedStorageKey, StateVersion};

use clap::{Args, Parser, ValueEnum};
use log::info;
use log::{info, warn};
use rand::prelude::*;
use serde::Serialize;
use sp_runtime::generic::BlockId;
Expand Down Expand Up @@ -159,6 +159,19 @@ pub struct StorageParams {
/// This is only used when `mode` is `validate-block`.
#[arg(long, default_value_t = 20)]
pub validate_block_rounds: u32,

/// Maximum number of keys to read.
///
/// Declares the number of random keys to read.
///
/// Default: Read all keys.
#[arg(long)]
pub keys_limit: Option<usize>,

/// Seed to use for benchs randomness, the same seed allow to replay
/// benchmarks under the same conditions.
#[arg(long)]
pub random_seed: Option<u64>,
}

impl StorageParams {
Expand Down Expand Up @@ -248,8 +261,35 @@ impl StorageCmd {
BA: ClientBackend<B>,
{
let hash = client.usage_info().chain.best_hash;
let mut keys: Vec<_> = client.storage_keys(hash, None, None)?.collect();
let (mut rng, _) = new_rng(None);
// Use first_key + take(keys_limit) to avoid loading all keys on huge chains.
// If not enough keys after first_key, circle back and take keys from the start.
let mut keys: Vec<_> = if let Some(keys_limit) = self.params.keys_limit {
use sp_core::blake2_256;
let first_key = self
.params
.random_seed
.map(|seed| sp_storage::StorageKey(blake2_256(&seed.to_be_bytes()[..]).to_vec()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are not keys_limit keys behind first_key this will "break". We should instead just load all the keys and then do sample_iter.

This can then directly replace the shuffle call below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sample_iter seems to create new values (IIUC). What if we use choose_multiple here?

		let mut keys: Vec<_> = client.storage_keys(hash, None, None)?.collect();
		let (mut rng, _) = new_rng(self.params.random_seed);
		keys = keys.choose_multiple(&mut rng, self.params.keys_limit.unwrap_or(keys.len())).cloned().collect();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah you can also use choose_multiple.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @bkchr, after discussing with the team loading all keys is exactly what breaks (OOM) the workflow for our huge storage chains.
But we get your first_key concern...let me try a different approach here and will ping you back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bkchr just pushed a new approach to get more keys when it is necessary.
LMK what do you think about it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturgontijo I'm fine with the approach, but can we get this into some shared function? :D

It can probably take two lambdas to abstract the different ways to read the entries.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a shared function in 6ed88c4
I tried to simplify it even more but the complexity (mostly trait bounds) was getting too high.

let mut keys: Vec<_> =
client.storage_keys(hash, None, first_key.as_ref())?.take(keys_limit).collect();
if keys.len() < keys_limit {
let need_more = keys_limit - keys.len();
if let Some(ref fk) = first_key {
let keys_from_start: Vec<_> = client
.storage_keys(hash, None, None)?
.take_while(|k| k.0.as_slice() < fk.0.as_slice())
.take(need_more)
.collect();
keys.extend(keys_from_start);
}
if keys.len() < keys_limit {
warn!("Only {} keys available (requested {})", keys.len(), keys_limit);
}
}
keys
} else {
client.storage_keys(hash, None, None)?.collect()
};
let (mut rng, _) = new_rng(self.params.random_seed);
keys.shuffle(&mut rng);

for i in 0..self.params.warmups {
Expand Down
40 changes: 35 additions & 5 deletions substrate/utils/frame/benchmarking-cli/src/storage/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use codec::Encode;
use frame_storage_access_test_runtime::StorageAccessParams;
use log::{debug, info};
use log::{debug, info, warn};
use rand::prelude::*;
use sc_cli::{Error, Result};
use sc_client_api::{Backend as ClientBackend, StorageProvider, UsageProvider};
Expand Down Expand Up @@ -58,14 +58,44 @@ impl StorageCmd {
let best_hash = client.usage_info().chain.best_hash;

info!("Preparing keys from block {}", best_hash);
// Load all keys and randomly shuffle them.
let mut keys: Vec<_> = client.storage_keys(best_hash, None, None)?.collect();
let (mut rng, _) = new_rng(None);
keys.shuffle(&mut rng);
// Use first_key + take(keys_limit) to avoid loading all keys on huge chains.
// If not enough keys after first_key, circle back and take keys from the start.
let mut keys: Vec<_> = if let Some(keys_limit) = self.params.keys_limit {
use sp_core::blake2_256;
let first_key = self
.params
.random_seed
.map(|seed| sp_storage::StorageKey(blake2_256(&seed.to_be_bytes()[..]).to_vec()));
let mut keys: Vec<_> = client
.storage_keys(best_hash, None, first_key.as_ref())?
.take(keys_limit)
.collect();
if keys.len() < keys_limit {
let need_more = keys_limit - keys.len();
if let Some(ref fk) = first_key {
let keys_from_start: Vec<_> = client
.storage_keys(best_hash, None, None)?
.take_while(|k| k.0.as_slice() < fk.0.as_slice())
.take(need_more)
.collect();
keys.extend(keys_from_start);
}
if keys.len() < keys_limit {
warn!("Only {} keys available (requested {})", keys.len(), keys_limit);
}
}
keys
} else {
client.storage_keys(best_hash, None, None)?.collect()
};

if keys.is_empty() {
return Err("Can't process benchmarking with empty storage".into())
}

let (mut rng, _) = new_rng(self.params.random_seed);
keys.shuffle(&mut rng);

let mut child_nodes = Vec::new();
// Interesting part here:
// Read all the keys in the database and measure the time it takes to access each.
Expand Down
34 changes: 31 additions & 3 deletions substrate/utils/frame/benchmarking-cli/src/storage/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,37 @@ impl StorageCmd {
);

info!("Preparing keys from block {}", best_hash);
// Load all KV pairs and randomly shuffle them.
let mut kvs: Vec<_> = trie.pairs(Default::default())?.collect();
let (mut rng, _) = new_rng(None);
// Use start_at + take(keys_limit) to avoid loading all KV pairs on huge chains.
// If not enough pairs after start_at, circle back and take pairs from the start.
let mut kvs: Vec<_> = if let Some(keys_limit) = self.params.keys_limit {
let start_at = self
.params
.random_seed
.map(|seed| sp_core::blake2_256(&seed.to_be_bytes()[..]).to_vec());
let mut iter_args = sp_state_machine::IterArgs::default();
iter_args.start_at = start_at.as_deref();
let mut kvs: Vec<_> = trie.pairs(iter_args)?.take(keys_limit).collect();
if kvs.len() < keys_limit {
let need_more = keys_limit - kvs.len();
if let Some(ref start) = start_at {
let pairs_from_start: Vec<_> = trie
.pairs(Default::default())?
.take_while(|r| {
r.as_ref().map_or(false, |(k, _)| k.as_slice() < start.as_slice())
})
.take(need_more)
.collect();
kvs.extend(pairs_from_start);
}
if kvs.len() < keys_limit {
info!("Only {} KV pairs available (requested {})", kvs.len(), keys_limit);
}
}
kvs
} else {
trie.pairs(Default::default())?.collect()
};
let (mut rng, _) = new_rng(self.params.random_seed);
kvs.shuffle(&mut rng);
if kvs.is_empty() {
return Err("Can't process benchmarking with empty storage".into())
Expand Down
Loading