diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 817fd9524df..22a66ce7ad5 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -311,6 +311,20 @@ jobs: cache-target: release - name: Run a beacon chain sim which tests VC fallback behaviour run: cargo run --release --bin simulator fallback-sim + peering-simulator-ubuntu: + name: peering-simulator-ubuntu + needs: [check-labels] + if: needs.check-labels.outputs.skip_ci != 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Get latest version of stable Rust + uses: moonrepo/setup-rust@v1 + with: + channel: stable + cache-target: release + - name: Run a beacon chain sim which tests BN peering behaviour + run: cargo run --release --bin simulator peering-sim execution-engine-integration-ubuntu: name: execution-engine-integration-ubuntu needs: [check-labels] @@ -466,6 +480,7 @@ jobs: 'ef-tests-ubuntu', 'basic-simulator-ubuntu', 'fallback-simulator-ubuntu', + 'peering-simulator-ubuntu', 'execution-engine-integration-ubuntu', 'check-code', 'check-msrv', diff --git a/testing/simulator/Cargo.toml b/testing/simulator/Cargo.toml index 12b0afcc753..6d114cf7581 100644 --- a/testing/simulator/Cargo.toml +++ b/testing/simulator/Cargo.toml @@ -17,7 +17,7 @@ logging = { workspace = true } node_test_rig = { path = "../node_test_rig" } parking_lot = { workspace = true } rayon = { workspace = true } -sensitive_url = { path = "../../common/sensitive_url" } +sensitive_url = { path = "../../common/sensitive_url" } serde_json = { workspace = true } tokio = { workspace = true } tracing-subscriber = { workspace = true } diff --git a/testing/simulator/src/cli.rs b/testing/simulator/src/cli.rs index 3d61dcde74a..7830a243a2f 100644 --- a/testing/simulator/src/cli.rs +++ b/testing/simulator/src/cli.rs @@ -122,4 +122,54 @@ pub fn cli_app() -> Command { .help("Continue after checks (default false)"), ), ) + .subcommand( + Command::new("peering-sim") + .about( + "Runs a Beacon Chain simulation with `n` beacon node and validator clients, \ + each with `v` validators. \ + The simulation runs with a post-Merge Genesis using `mock-el`. \ + As the simulation runs, additional nodes are periodically added and \ + there are checks made to ensure that the nodes are able to sync to the \ + network. If a node fails to sync, the simulation will exit immediately.", + ) + .arg( + Arg::new("nodes") + .short('n') + .long("nodes") + .action(ArgAction::Set) + .default_value("2") + .help("Number of beacon nodes"), + ) + .arg( + Arg::new("validators-per-node") + .short('v') + .long("validators-per-node") + .action(ArgAction::Set) + .default_value("10") + .help("Number of validators"), + ) + .arg( + Arg::new("speed-up-factor") + .short('s') + .long("speed-up-factor") + .action(ArgAction::Set) + .default_value("3") + .help("Speed up factor. Please use a divisor of 12."), + ) + .arg( + Arg::new("debug-level") + .short('d') + .long("debug-level") + .action(ArgAction::Set) + .default_value("debug") + .help("Set the severity level of the logs."), + ) + .arg( + Arg::new("continue-after-checks") + .short('c') + .long("continue_after_checks") + .action(ArgAction::SetTrue) + .help("Continue after checks (default false)"), + ), + ) } diff --git a/testing/simulator/src/local_network.rs b/testing/simulator/src/local_network.rs index 3914d33f936..d832c01c33b 100644 --- a/testing/simulator/src/local_network.rs +++ b/testing/simulator/src/local_network.rs @@ -205,6 +205,7 @@ impl LocalNetwork { beacon_config.network.enr_udp4_port = Some(BOOTNODE_PORT.try_into().expect("non zero")); beacon_config.network.enr_tcp4_port = Some(BOOTNODE_PORT.try_into().expect("non zero")); beacon_config.network.discv5_config.table_filter = |_| true; + beacon_config.network.subscribe_all_data_column_subnets = true; let execution_node = LocalExecutionNode::new( self.context.service_context("boot_node_el".into()), diff --git a/testing/simulator/src/main.rs b/testing/simulator/src/main.rs index a259ac11339..fc1bf52ca3d 100644 --- a/testing/simulator/src/main.rs +++ b/testing/simulator/src/main.rs @@ -15,6 +15,7 @@ mod checks; mod cli; mod fallback_sim; mod local_network; +mod peering_sim; mod retry; use cli::cli_app; @@ -44,6 +45,13 @@ fn main() { std::process::exit(1) } }, + Some(("peering-sim", matches)) => match peering_sim::run_peering_sim(matches) { + Ok(()) => println!("Simulation exited successfully"), + Err(e) => { + eprintln!("Simulation exited with error: {}", e); + std::process::exit(1) + } + }, _ => { eprintln!("Invalid subcommand. Use --help to see available options"); std::process::exit(1) diff --git a/testing/simulator/src/peering_sim.rs b/testing/simulator/src/peering_sim.rs new file mode 100644 index 00000000000..32503140036 --- /dev/null +++ b/testing/simulator/src/peering_sim.rs @@ -0,0 +1,302 @@ +use crate::local_network::LocalNetworkParams; +use crate::{checks, LocalNetwork}; +use clap::ArgMatches; + +use crate::retry::with_retry; +use environment::tracing_common; +use futures::prelude::*; +use node_test_rig::{ + environment::{EnvironmentBuilder, LoggerConfig}, + testing_validator_config, ApiTopic, ValidatorFiles, +}; +use rayon::prelude::*; +use std::cmp::max; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::sleep; +use tracing_subscriber::prelude::*; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use types::{Epoch, EthSpec, MinimalEthSpec}; + +const END_EPOCH: u64 = 16; +const GENESIS_DELAY: u64 = 32; +const ALTAIR_FORK_EPOCH: u64 = 0; +const BELLATRIX_FORK_EPOCH: u64 = 0; +const CAPELLA_FORK_EPOCH: u64 = 1; +const DENEB_FORK_EPOCH: u64 = 2; +const ELECTRA_FORK_EPOCH: u64 = 3; +const FULU_FORK_EPOCH: u64 = 8; + +const SUGGESTED_FEE_RECIPIENT: [u8; 20] = + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]; + +pub fn run_peering_sim(matches: &ArgMatches) -> Result<(), String> { + let node_count = matches + .get_one::("nodes") + .expect("missing nodes default") + .parse::() + .expect("missing nodes default"); + + // Extra beacon nodes added with delay + let extra_nodes = 4; + let validators_per_node = matches + .get_one::("validators-per-node") + .expect("missing validators-per-node default") + .parse::() + .expect("missing validators-per-node default"); + let speed_up_factor = matches + .get_one::("speed-up-factor") + .expect("missing speed-up-factor default") + .parse::() + .expect("missing speed-up-factor default"); + let log_level = matches + .get_one::("debug-level") + .expect("missing debug-level"); + + let continue_after_checks = matches.get_flag("continue-after-checks"); + + println!("Peering Simulator:"); + println!(" nodes: {}", node_count); + println!(" validators-per-node: {}", validators_per_node); + println!(" speed-up-factor: {}", speed_up_factor); + println!(" continue-after-checks: {}", continue_after_checks); + + // Generate the directories and keystores required for the validator clients. + let validator_files = (0..node_count) + .into_par_iter() + .map(|i| { + println!( + "Generating keystores for validator {} of {}", + i + 1, + node_count + ); + + let indices = + (i * validators_per_node..(i + 1) * validators_per_node).collect::>(); + ValidatorFiles::with_keystores(&indices).unwrap() + }) + .collect::>(); + + let ( + env_builder, + logger_config, + stdout_logging_layer, + _file_logging_layer, + _sse_logging_layer_opt, + _libp2p_discv5_layer, + ) = tracing_common::construct_logger( + LoggerConfig { + path: None, + debug_level: tracing_common::parse_level(&log_level.clone()), + logfile_debug_level: tracing_common::parse_level(&log_level.clone()), + log_format: None, + logfile_format: None, + log_color: true, + logfile_color: false, + disable_log_timestamp: false, + max_log_size: 0, + max_log_number: 0, + compression: false, + is_restricted: true, + sse_logging: false, + extra_info: false, + }, + matches, + EnvironmentBuilder::minimal(), + ); + + if let Err(e) = tracing_subscriber::registry() + .with(stdout_logging_layer.with_filter(logger_config.debug_level)) + .try_init() + { + eprintln!("Failed to initialize dependency logging: {e}"); + } + + let mut env = env_builder.multi_threaded_tokio_runtime()?.build()?; + + let mut spec = (*env.eth2_config.spec).clone(); + + let total_validator_count = validators_per_node * node_count; + let genesis_delay = GENESIS_DELAY; + + spec.seconds_per_slot /= speed_up_factor; + spec.seconds_per_slot = max(1, spec.seconds_per_slot); + spec.genesis_delay = genesis_delay; + spec.min_genesis_time = 0; + spec.min_genesis_active_validator_count = total_validator_count as u64; + spec.altair_fork_epoch = Some(Epoch::new(ALTAIR_FORK_EPOCH)); + spec.bellatrix_fork_epoch = Some(Epoch::new(BELLATRIX_FORK_EPOCH)); + spec.capella_fork_epoch = Some(Epoch::new(CAPELLA_FORK_EPOCH)); + spec.deneb_fork_epoch = Some(Epoch::new(DENEB_FORK_EPOCH)); + spec.electra_fork_epoch = Some(Epoch::new(ELECTRA_FORK_EPOCH)); + spec.fulu_fork_epoch = Some(Epoch::new(FULU_FORK_EPOCH)); + + let spec = Arc::new(spec); + env.eth2_config.spec = spec.clone(); + + let slot_duration = Duration::from_secs(spec.seconds_per_slot); + let slots_per_epoch = MinimalEthSpec::slots_per_epoch(); + + let context = env.core_context(); + + let main_future = async { + /* + * Create a new `LocalNetwork` with one beacon node. + */ + let max_retries = 3; + let (network, beacon_config, mock_execution_config) = with_retry(max_retries, || { + Box::pin(LocalNetwork::create_local_network( + None, + None, + LocalNetworkParams { + validator_count: total_validator_count, + node_count, + extra_nodes, + proposer_nodes: 0, + genesis_delay, + }, + context.clone(), + )) + }) + .await?; + + // Add nodes to the network. + for _ in 0..node_count { + network + .add_beacon_node(beacon_config.clone(), mock_execution_config.clone(), false) + .await?; + } + + /* + * One by one, add validators to the network. + */ + + let executor = context.executor.clone(); + for (i, files) in validator_files.into_iter().enumerate() { + let network_1 = network.clone(); + executor.spawn( + async move { + let mut validator_config = testing_validator_config(); + validator_config.validator_store.fee_recipient = + Some(SUGGESTED_FEE_RECIPIENT.into()); + println!("Adding validator client {}", i); + + // Enable broadcast on every 4th node. + if i % 4 == 0 { + validator_config.broadcast_topics = ApiTopic::all(); + let beacon_nodes = vec![i, (i + 1) % node_count]; + network_1 + .add_validator_client_with_fallbacks( + validator_config, + i, + beacon_nodes, + files, + ) + .await + } else { + network_1 + .add_validator_client(validator_config, i, files) + .await + } + .expect("should add validator"); + }, + "vc", + ); + } + + // Set all payloads as valid. This effectively assumes the EL is infalliable. + network.execution_nodes.write().iter().for_each(|node| { + node.server.all_payloads_valid(); + }); + + let duration_to_genesis = network.duration_to_genesis().await?; + println!("Duration to genesis: {}", duration_to_genesis.as_secs()); + sleep(duration_to_genesis).await; + + /* + * Start the checks that ensure the network performs as expected. + * + * We start these checks immediately after the validators have started. This means we're + * relying on the validator futures to all return immediately after genesis so that these + * tests start at the right time. Whilst this is works well for now, it's subject to + * breakage by changes to the VC. + */ + + let mut sequence = vec![]; + + let mut current_node_count = node_count; + + let available_epochs = END_EPOCH.saturating_sub(2); + + // Using floats here is kind of hacky, but it seems to work paired with the `ceil` later. + let step = if extra_nodes > 1 { + (available_epochs as f64) / (extra_nodes as f64) + } else { + 1.0 + }; + + for i in 0..extra_nodes { + let network_1 = network.clone(); + let owned_mock_execution_config = mock_execution_config.clone(); + let owned_beacon_config = beacon_config.clone(); + + let node_index = current_node_count; + + let epoch_delay = (i as f64 * step).ceil() as u64; + + sequence.push(async move { + network_1 + .add_beacon_node_with_delay( + owned_beacon_config, + owned_mock_execution_config, + epoch_delay, + slot_duration, + slots_per_epoch, + ) + .await?; + checks::ensure_node_synced_up_to_slot( + network_1, + node_index, + Epoch::new(END_EPOCH).start_slot(slots_per_epoch), + slot_duration, + ) + .await?; + Ok::<(), String>(()) + }); + current_node_count += 1; + } + + let futures = futures::future::join_all(sequence).await; + for res in futures { + res? + } + + // The `final_future` either completes immediately or never completes, depending on the value + // of `continue_after_checks`. + + if continue_after_checks { + future::pending::<()>().await; + } + /* + * End the simulation by dropping the network. This will kill all running beacon nodes and + * validator clients. + */ + println!( + "Simulation complete. Finished with {} beacon nodes and {} validator clients", + network.beacon_node_count() + network.proposer_node_count(), + network.validator_client_count() + ); + + // Be explicit about dropping the network, as this kills all the nodes. This ensures + // all the checks have adequate time to pass. + drop(network); + Ok::<(), String>(()) + }; + + env.runtime().block_on(main_future).unwrap(); + + env.fire_signal(); + env.shutdown_on_idle(); + + Ok(()) +}