Skip to content

Commit ff4abf4

Browse files
committed
moar metrics
1 parent 5af18b2 commit ff4abf4

File tree

9 files changed

+283
-6
lines changed

9 files changed

+283
-6
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lean_client/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ clap = { workspace = true }
99
clap_utils = { workspace = true }
1010
lean_consensus = { workspace = true }
1111
environment = { workspace = true }
12+
health_metrics = { workspace = true }
13+
1214
lean_genesis = { workspace = true }
1315
lean_keystore = { path = "keystore" }
1416
lean_network = { workspace = true }

lean_client/lean_network/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ task_executor = { workspace = true }
2020
tokio = { workspace = true }
2121
tracing = { workspace = true }
2222
types = { workspace = true }
23+
metrics = { workspace = true }
24+
2325

2426
[dependencies.libp2p]
2527
version = "0.56"

lean_client/lean_network/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ mod bootstrap;
22
mod config;
33
mod service;
44
mod topics;
5+
mod metrics;
6+
57

68
pub use bootstrap::load_bootstrap_nodes;
79
pub use config::NetworkConfig;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
pub use metrics::*;
2+
use std::sync::LazyLock;
3+
4+
/* P2P Metrics */
5+
pub static LEAN_P2P_PEERS: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
6+
try_create_int_gauge("lean_p2p_peers", "Total number of connected peers")
7+
});
8+
9+
pub static LEAN_P2P_MESSAGES_RECEIVED_TOTAL: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
10+
try_create_int_counter_vec(
11+
"lean_p2p_messages_received_total",
12+
"Total number of gossip messages received",
13+
&["topic"],
14+
)
15+
});
16+
17+
pub static LEAN_P2P_MESSAGES_PUBLISHED_TOTAL: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
18+
try_create_int_counter_vec(
19+
"lean_p2p_messages_published_total",
20+
"Total number of gossip messages published",
21+
&["topic"],
22+
)
23+
});

lean_client/lean_network/src/service.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use std::time::{Duration, Instant};
2020
use tokio::sync::mpsc;
2121
use tracing::{debug, info, trace, warn};
2222
use types::EthSpec;
23+
use crate::metrics;
24+
2325

2426
/// Domain prefix for valid snappy-compressed messages per Eth2 networking spec
2527
/// This is prepended to message data before hashing to create unique message IDs
@@ -236,6 +238,9 @@ impl<E: EthSpec> NetworkService<E> {
236238
return None;
237239
};
238240

241+
metrics::inc_counter_vec(&*metrics::LEAN_P2P_MESSAGES_RECEIVED_TOTAL, &[topic]);
242+
243+
239244
// Decompress snappy-compressed message
240245
let decompressed = match self.decompress_snappy(data) {
241246
Ok(d) => d,
@@ -385,14 +390,17 @@ impl<E: EthSpec> NetworkService<E> {
385390
"Connection established with peer: {:?} at {:?}",
386391
peer_id, endpoint
387392
);
388-
// Mark matching bootstrap nodes as connected
389393
self.mark_bootstrap_node_connected_by_endpoint(&endpoint);
394+
metrics::inc_gauge(&*metrics::LEAN_P2P_PEERS);
390395
}
391396
SwarmEvent::ConnectionClosed {
392397
peer_id, cause, ..
393398
} => {
394399
debug!("Connection closed with peer: {:?}, cause: {:?}", peer_id, cause);
400+
metrics::dec_gauge(&*metrics::LEAN_P2P_PEERS);
395401
}
402+
403+
396404
SwarmEvent::IncomingConnection { .. } => {
397405
debug!("Incoming connection");
398406
}
@@ -469,7 +477,10 @@ impl<E: EthSpec> NetworkService<E> {
469477
);
470478
} else {
471479
debug!("Successfully published message to topic: {}", encoded_topic);
480+
metrics::inc_counter_vec(&*metrics::LEAN_P2P_MESSAGES_PUBLISHED_TOTAL, &[&encoded_topic]);
472481
}
482+
483+
473484
}
474485

475486
/// Attempts to connect to bootstrap nodes that are not yet connected

lean_client/src/http_metrics.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,12 @@ pub fn serve(
5353
.and_then(|| async move {
5454
let mut buffer = String::new();
5555
let encoder = metrics::TextEncoder::new();
56+
57+
// Scrape health metrics (CPU, Memory, etc.)
58+
health_metrics::metrics::scrape_health_metrics();
59+
5660
// metrics::gather() returns Vec<MetricFamily>, which is what we want to encode.
61+
5762
match encoder.encode_utf8(&metrics::gather(), &mut buffer) {
5863
Ok(()) => Ok::<_, warp::Rejection>(
5964
Response::builder()

lean_client/validator_service/src/lib.rs

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
7070
// Ensure the public key JSON is well-formed even if we do not store it explicitly.
7171
let _ = validator_key_pair.hashsig_public_key()?;
7272

73+
// Ensure all validator metrics are registered in the global Prometheus
74+
// registry so that the /metrics endpoint exposes the full Zeam metric
75+
// set even before any events have occurred.
76+
metrics::init();
77+
7378
Ok(Self {
7479
network_recv,
7580
network_send,
@@ -298,6 +303,28 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
298303
let justified = &state.latest_justified;
299304
let finalized = &state.latest_finalized;
300305

306+
// Update chain status metrics
307+
if let Ok(gauge) = &*metrics::LEAN_HEAD_SLOT {
308+
gauge.set(head_block.slot.0 as i64);
309+
}
310+
if let Ok(gauge) = &*metrics::LEAN_LATEST_JUSTIFIED_SLOT {
311+
gauge.set(justified.slot.0 as i64);
312+
}
313+
if let Ok(gauge) = &*metrics::LEAN_LATEST_FINALIZED_SLOT {
314+
gauge.set(finalized.slot.0 as i64);
315+
}
316+
if let Ok(safe_root) = self.chain.fetch_safe_target()
317+
&& let Some(root) = safe_root
318+
&& let Ok(Some(block)) = self.chain.fetch_block(root)
319+
&& let Ok(gauge) = &*metrics::LEAN_LATEST_SAFE_SLOT
320+
{
321+
gauge.set(block.slot.0 as i64);
322+
}
323+
if let Ok(gauge) = &*metrics::LEAN_VALIDATORS_COUNT {
324+
gauge.set(state.validators.len() as i64);
325+
}
326+
327+
301328
let sep = "=".repeat(63);
302329
info!(
303330
separator = %sep,
@@ -533,7 +560,15 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
533560
// NOTE: Signature verification will be implemented when signature verification functions
534561
// become available from the leansig library. For now, we accept all attestations.
535562
let _timer = metrics::start_timer(&metrics::LEAN_PQ_SIGNATURE_ATTESTATION_VERIFICATION_TIME);
563+
let _validation_timer = metrics::start_timer(&metrics::LEAN_ATTESTATION_VALIDATION_TIME);
564+
536565
// [PLACEHOLDER] verification logic
566+
// For now considering all valid
567+
if let Ok(counter) = &*metrics::LEAN_ATTESTATIONS_VALID_TOTAL {
568+
counter.inc();
569+
}
570+
571+
drop(_validation_timer);
537572
drop(_timer);
538573

539574
debug!(validator_id, epoch, "Attestation received and accepted");
@@ -687,6 +722,10 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
687722
let proposer_attestation = &signed_block.message.proposer_attestation;
688723
let block_root = block.tree_hash_root();
689724

725+
// Track total time spent in the on_block-style handler, matching the
726+
// Zeam / beacon-node `chain_onblock_duration_seconds` metric name.
727+
let _onblock_timer = metrics::start_timer(&metrics::CHAIN_ONBLOCK_DURATION_SECONDS);
728+
690729
info!(
691730
slot = block.slot.0,
692731
proposer_index = block.proposer_index,
@@ -745,13 +784,51 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
745784

746785
debug!("Cloned parent state for state transition");
747786

748-
// Validate cryptographic signatures
749-
//
750-
// XMSS signature verification is now implemented using leansig.
787+
// Execute state transition function with metrics
788+
let _block_processing_timer =
789+
metrics::start_timer(&metrics::BLOCK_PROCESSING_DURATION_SECONDS);
790+
let _state_timer = metrics::start_timer(&metrics::LEAN_STATE_TRANSITION_TIME);
791+
751792
let valid_signatures = true;
793+
794+
// Verify signatures if enabled
795+
if valid_signatures {
796+
signed_block.verify_signatures(&post_state)?;
797+
}
798+
799+
let block = &signed_block.message.block;
752800

753-
// Execute state transition function to compute post-block state
754-
post_state.state_transition(&signed_block, valid_signatures)?;
801+
// Process slots (catch up)
802+
if post_state.slot < block.slot {
803+
let slots_processed = block.slot.0.saturating_sub(post_state.slot.0);
804+
if let Ok(counter) = &*metrics::LEAN_STATE_TRANSITION_SLOTS_PROCESSED_TOTAL {
805+
counter.inc_by(slots_processed);
806+
}
807+
808+
let _slots_timer = metrics::start_timer(&metrics::LEAN_STATE_TRANSITION_SLOTS_PROCESSING_TIME);
809+
post_state.process_slots(block.slot)?;
810+
drop(_slots_timer);
811+
}
812+
813+
// Process block
814+
if let Ok(counter) = &*metrics::LEAN_STATE_TRANSITION_ATTESTATIONS_PROCESSED_TOTAL {
815+
counter.inc_by(block.body.attestations.len() as u64);
816+
}
817+
let _block_timer = metrics::start_timer(&metrics::LEAN_STATE_TRANSITION_BLOCK_PROCESSING_TIME);
818+
post_state.process_block(block)?;
819+
drop(_block_timer);
820+
821+
// Verify state root
822+
let computed_state_root = post_state.tree_hash_root();
823+
if block.state_root != computed_state_root {
824+
return Err(format!(
825+
"Invalid block state root. Expected: {:?}, got: {:?}",
826+
computed_state_root, block.state_root
827+
));
828+
}
829+
830+
drop(_block_processing_timer);
831+
drop(_state_timer);
755832

756833
debug!(
757834
post_slot = post_state.slot.0,
@@ -772,8 +849,10 @@ impl<T: SlotClock + 'static, E: EthSpec, D: KeyValueStore<E>> ValidatorService<T
772849
debug!(?block_root, "Block and state saved to cache and database");
773850

774851
// Ensure parent chain exists in proto array and register the new block.
852+
let _fc_timer = metrics::start_timer(&metrics::LEAN_FORK_CHOICE_BLOCK_PROCESSING_TIME);
775853
self.chain
776854
.register_block(block_root, block.slot, block.parent_root)?;
855+
drop(_fc_timer);
777856

778857
// Process block body attestations
779858
//

0 commit comments

Comments
 (0)