From 142c8ca5ba3bf31580e6e03fd500d17d9321cec8 Mon Sep 17 00:00:00 2001 From: Fridrik Asmundsson Date: Thu, 23 Oct 2025 15:10:52 +0000 Subject: [PATCH 1/5] Use prometheus client directly for metrics + remove go-metrics dependency Remove use of cosmossdk.io/telemetry package which relies on go-metrics and replace it with direct use of prometheus/client_golang. This aligns with how CometBFT handles metrics and brings several benefits: eliminates race conditions from go-metrics, avoids questionable maintenance of upstream repo, removes problematic metric retention, and significantly simplifies the metrics codebase. All beacon-kit services now use prometheus client directly through a unified metrics.Factory interface. Additionally, replace cosmossdk.io/store/metrics imports with a custom storage.NoOpStoreMetrics implementation to avoid pulling in the telemetry wrapper. While go-metrics remains as a transitive dependency through cosmossdk.io/store/types, it is no longer used at runtime. --- .golangci.yaml | 4 +- beacon/blockchain/blob_fetcher.go | 6 +- beacon/blockchain/blob_fetcher_metrics.go | 82 +++- beacon/blockchain/blob_queue.go | 4 +- beacon/blockchain/blob_queue_test.go | 18 +- beacon/blockchain/deposit.go | 12 +- beacon/blockchain/interfaces.go | 15 - beacon/blockchain/metrics.go | 162 ++++---- beacon/blockchain/payload_test.go | 6 +- beacon/blockchain/service.go | 6 +- beacon/validator/interfaces.go | 11 - beacon/validator/metrics.go | 92 +++-- beacon/validator/service.go | 6 +- cmd/beacond/defaults.go | 4 +- config/config/config.go | 16 +- config/config/config.toml.tpl | 42 +- consensus/cometbft/service/abci.go | 6 +- consensus/cometbft/service/interfaces.go | 10 - consensus/cometbft/service/metrics.go | 104 +++++ .../cometbft/service/prepare_proposal.go | 3 +- .../cometbft/service/process_proposal.go | 3 +- consensus/cometbft/service/service.go | 6 +- consensus/cometbft/service/state/state.go | 4 +- da/blob/factory.go | 6 +- da/blob/factory_metrics.go | 99 +++-- da/blob/interfaces.go | 11 - da/blob/processor.go | 9 +- da/blob/processor_metrics.go | 58 +-- da/blob/verifier.go | 6 +- da/blob/verifier_metrics.go | 93 +++-- da/blobreactor/interfaces.go | 14 - da/blobreactor/metrics.go | 104 ++++- da/blobreactor/reactor.go | 6 +- da/blobreactor/reactor_test.go | 9 +- da/types/sidecar_test.go | 10 +- execution/client/client.go | 8 +- execution/client/interfaces.go | 14 - execution/client/metrics.go | 360 ++++++++++++------ execution/engine/engine.go | 6 +- execution/engine/interfaces.go | 7 - execution/engine/metrics.go | 318 ++++++++++------ go.mod | 6 +- node-api/backend/backend.go | 13 +- node-api/backend/backend_test.go | 4 +- node-api/handlers/beacon/genesis_test.go | 13 +- .../beacon/validators_filters_test.go | 4 +- node-core/components/backend.go | 6 +- node-core/components/blob_fetcher.go | 17 +- node-core/components/blob_reactor.go | 11 +- node-core/components/blobs.go | 7 +- node-core/components/chain_service.go | 5 +- ...lemetry_service.go => cometbft_metrics.go} | 14 +- node-core/components/cometbft_service.go | 5 +- node-core/components/engine.go | 19 +- node-core/components/interfaces.go | 7 - node-core/components/metrics/sink.go | 97 ----- .../components/metrics_factory.go | 50 +-- node-core/components/metrics_providers.go | 114 ++++++ node-core/components/reporting_service.go | 9 +- node-core/components/service_registry.go | 5 - node-core/components/sidecars.go | 5 +- node-core/components/state_processor.go | 15 +- node-core/components/storage/storage.go | 8 +- node-core/components/validator_service.go | 5 +- node-core/services/version/interfaces.go | 10 - node-core/services/version/version.go | 21 - observability/metrics/discard/discard.go | 114 ++++++ observability/metrics/discard/discard_test.go | 109 ++++++ observability/metrics/lv/labelvalues.go | 55 +++ observability/metrics/lv/labelvalues_test.go | 102 +++++ observability/metrics/metrics.go | 101 +++++ .../metrics/prometheus/prometheus.go | 227 +++++++++++ .../metrics/prometheus/prometheus_test.go | 182 +++++++++ state-transition/core/interfaces.go | 8 - state-transition/core/metrics.go | 128 +++++-- state-transition/core/state/interfaces.go | 5 - state-transition/core/state/metrics.go | 48 ++- state-transition/core/state/statedb.go | 18 +- state-transition/core/state/statedb_test.go | 4 +- state-transition/core/state_processor.go | 6 +- .../core/state_processor_payload_test.go | 4 +- storage/beacondb/registry_test.go | 9 +- .../noop_metrics.go | 16 +- testing/networks/80069/app.toml | 40 +- testing/networks/80094/app.toml | 40 +- .../blob_fetcher_integration_test.go | 10 +- testing/simulated/components.go | 4 +- testing/simulated/malicious_proposer_test.go | 6 +- testing/simulated/simcomet.go | 5 +- testing/simulated/valid_chain_test.go | 4 +- testing/state-transition/state-transition.go | 15 +- 91 files changed, 2361 insertions(+), 1129 deletions(-) create mode 100644 consensus/cometbft/service/metrics.go rename node-core/components/{telemetry_service.go => cometbft_metrics.go} (70%) delete mode 100644 node-core/components/metrics/sink.go rename observability/telemetry/service.go => node-core/components/metrics_factory.go (50%) create mode 100644 node-core/components/metrics_providers.go create mode 100644 observability/metrics/discard/discard.go create mode 100644 observability/metrics/discard/discard_test.go create mode 100644 observability/metrics/lv/labelvalues.go create mode 100644 observability/metrics/lv/labelvalues_test.go create mode 100644 observability/metrics/metrics.go create mode 100644 observability/metrics/prometheus/prometheus.go create mode 100644 observability/metrics/prometheus/prometheus_test.go rename node-core/components/telemetry_sink.go => storage/noop_metrics.go (62%) diff --git a/.golangci.yaml b/.golangci.yaml index cca2522595..8619a0d382 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -288,7 +288,7 @@ linters: - nosprintfhostport # checks for misuse of Sprintf to construct a host with port in a URL - perfsprint # checks that fmt.Sprintf can be replaced with a faster alternative - predeclared # finds code that shadows one of Go's predeclared identifiers - - promlinter # checks Prometheus metrics naming via promlint + #- promlinter # checks Prometheus metrics naming via promlint - protogetter # reports direct reads from proto message fields when getters should be used - reassign # checks that package variables are not reassigned - revive # fast, configurable, extensible, flexible, and beautiful linter for Go, drop-in replacement of golint @@ -373,4 +373,4 @@ issues: - thelper exclude-files: - "pkg/cometbft/cli/.*\\.go" - - "pkg/cometbft/service/server/.*\\.go" \ No newline at end of file + - "pkg/cometbft/service/server/.*\\.go" diff --git a/beacon/blockchain/blob_fetcher.go b/beacon/blockchain/blob_fetcher.go index c350856387..2aa0f92821 100644 --- a/beacon/blockchain/blob_fetcher.go +++ b/beacon/blockchain/blob_fetcher.go @@ -60,7 +60,7 @@ type blobFetcher struct { queue *blobQueue // Queue for persistent requests executor *blobFetchExecutor // Executor for fetch logic config BlobFetcherConfig // Configuration - metrics *blobFetcherMetrics + metrics *BlobFetcherMetrics // We need to track current head slot so we know when blob download requests need to be pruned as they are outside the WithinDAPeriod headSlotMu sync.RWMutex @@ -80,10 +80,8 @@ func NewBlobFetcher( storageBackend StorageBackend, chainSpec BlobFetcherChainSpec, config BlobFetcherConfig, - telemetrySink TelemetrySink, + metrics *BlobFetcherMetrics, ) (BlobFetcher, error) { - metrics := newBlobFetcherMetrics(telemetrySink) - queue, err := newBlobQueue(filepath.Join(dataDir, "blobs", "download_queue"), logger, metrics) if err != nil { return nil, err diff --git a/beacon/blockchain/blob_fetcher_metrics.go b/beacon/blockchain/blob_fetcher_metrics.go index d87cce3c93..4c49a30af8 100644 --- a/beacon/blockchain/blob_fetcher_metrics.go +++ b/beacon/blockchain/blob_fetcher_metrics.go @@ -20,44 +20,94 @@ package blockchain +import ( + "github.com/berachain/beacon-kit/observability/metrics" +) + // Metric reason constants for blob fetcher. const ( expiredReasonOutsideDA = "outside_da_period" expiredReasonMaxRetries = "max_retries" ) -// blobFetcherMetrics contains metrics for the blob fetcher queue and retry operations. -type blobFetcherMetrics struct { - sink TelemetrySink +// BlobFetcherMetrics contains metrics for the blob fetcher queue and retry operations. +type BlobFetcherMetrics struct { + RetriesTotal metrics.Counter + RequestsExpiredTotal metrics.Counter + RequestsCompletedTotal metrics.Counter + RequestsQueuedTotal metrics.Counter + QueueDepth metrics.Gauge } -// newBlobFetcherMetrics creates a new blobFetcherMetrics instance. -func newBlobFetcherMetrics(sink TelemetrySink) *blobFetcherMetrics { - return &blobFetcherMetrics{sink: sink} +// NewBlobFetcherMetrics returns a new BlobFetcherMetrics instance with metrics from the provided factory. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewBlobFetcherMetrics(factory metrics.Factory) *BlobFetcherMetrics { + return &BlobFetcherMetrics{ + RetriesTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blob_fetcher", + Name: "retries_total", + Help: "Number of times a blob request was retried after failure", + }, + nil, + ), + RequestsExpiredTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blob_fetcher", + Name: "requests_expired_total", + Help: "Number of blob fetch requests that expired before completion", + }, + []string{"reason"}, + ), + RequestsCompletedTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blob_fetcher", + Name: "requests_completed_total", + Help: "Number of blob fetch requests that completed successfully", + }, + nil, + ), + RequestsQueuedTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blob_fetcher", + Name: "requests_queued_total", + Help: "Number of new blob fetch requests added to the queue", + }, + nil, + ), + QueueDepth: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "blob_fetcher", + Name: "queue_depth", + Help: "Current depth of the blob fetcher queue", + }, + nil, + ), + } } // recordRetry increments counter when a blob request is retried after failure. -func (m *blobFetcherMetrics) recordRetry() { - m.sink.IncrementCounter("beacon_kit.blob_fetcher.retries_total") +func (m *BlobFetcherMetrics) recordRetry() { + m.RetriesTotal.Add(1) } // recordRequestExpired increments counter when request expires before completion. // Reason: "outside_da_period", "max_retries" -func (m *blobFetcherMetrics) recordRequestExpired(reason string) { - m.sink.IncrementCounter("beacon_kit.blob_fetcher.requests_expired_total", "reason", reason) +func (m *BlobFetcherMetrics) recordRequestExpired(reason string) { + m.RequestsExpiredTotal.With("reason", reason).Add(1) } // recordRequestComplete increments counter when request completes successfully. -func (m *blobFetcherMetrics) recordRequestComplete() { - m.sink.IncrementCounter("beacon_kit.blob_fetcher.requests_completed_total") +func (m *BlobFetcherMetrics) recordRequestComplete() { + m.RequestsCompletedTotal.Add(1) } // recordRequestQueued increments counter when a new request is added to queue. -func (m *blobFetcherMetrics) recordRequestQueued() { - m.sink.IncrementCounter("beacon_kit.blob_fetcher.requests_queued_total") +func (m *BlobFetcherMetrics) recordRequestQueued() { + m.RequestsQueuedTotal.Add(1) } // setQueueDepth sets the current depth of the blob fetcher queue. -func (m *blobFetcherMetrics) setQueueDepth(depth int) { - m.sink.SetGauge("beacon_kit.blob_fetcher.queue_depth", int64(depth)) +func (m *BlobFetcherMetrics) setQueueDepth(depth int) { + m.QueueDepth.Set(float64(depth)) } diff --git a/beacon/blockchain/blob_queue.go b/beacon/blockchain/blob_queue.go index c4b07fd391..11155b069a 100644 --- a/beacon/blockchain/blob_queue.go +++ b/beacon/blockchain/blob_queue.go @@ -52,12 +52,12 @@ type BlobFetchRequest struct { type blobQueue struct { queueDir string logger log.Logger - metrics *blobFetcherMetrics + metrics *BlobFetcherMetrics } // newBlobQueue creates a new blob queue with the given directory. // It creates the directory if it doesn't exist and cleans up orphaned temp files. -func newBlobQueue(queueDir string, logger log.Logger, metrics *blobFetcherMetrics) (*blobQueue, error) { +func newBlobQueue(queueDir string, logger log.Logger, metrics *BlobFetcherMetrics) (*blobQueue, error) { // Create queue directory if err := os.MkdirAll(queueDir, 0750); err != nil { return nil, fmt.Errorf("failed to create blob download queue directory: %w", err) diff --git a/beacon/blockchain/blob_queue_test.go b/beacon/blockchain/blob_queue_test.go index fa96c4d69f..5f817e980f 100644 --- a/beacon/blockchain/blob_queue_test.go +++ b/beacon/blockchain/blob_queue_test.go @@ -30,7 +30,7 @@ import ( "cosmossdk.io/log" ctypes "github.com/berachain/beacon-kit/consensus-types/types" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/eip4844" "github.com/berachain/beacon-kit/primitives/math" @@ -47,7 +47,7 @@ func createTestBlobRequest(slot math.Slot, blobCount int) BlobFetchRequest { func TestBlobQueue_SuccessfulWrite(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) slot := math.Slot(100) @@ -71,7 +71,7 @@ func TestBlobQueue_SuccessfulWrite(t *testing.T) { func TestBlobQueue_RetryLogic(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) withinDA := func(_, _ math.Slot) bool { return true } @@ -103,7 +103,7 @@ func TestBlobQueue_RetryLogic(t *testing.T) { func TestBlobQueue_AvailabilityWindow(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) // Add old request @@ -131,7 +131,7 @@ func TestBlobQueue_AvailabilityWindow(t *testing.T) { func TestBlobQueue_UpdateRetry(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) request := createTestBlobRequest(math.Slot(100), 2) @@ -157,7 +157,7 @@ func TestBlobQueue_UpdateRetry(t *testing.T) { func TestBlobQueue_ProcessingOrder(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) withinDA := func(_, _ math.Slot) bool { return true } @@ -188,7 +188,7 @@ func TestBlobQueue_ProcessingOrder(t *testing.T) { func TestBlobQueue_MaxRetryLimit(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) withinDA := func(_, _ math.Slot) bool { return true } @@ -217,7 +217,7 @@ func TestBlobQueue_MaxRetryLimit(t *testing.T) { func TestBlobQueue_UnderRetryLimit(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) withinDA := func(_, _ math.Slot) bool { return true } @@ -245,7 +245,7 @@ func TestBlobQueue_UnderRetryLimit(t *testing.T) { func TestBlobQueue_CorruptedFileHandling(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), newBlobFetcherMetrics(metrics.NewNoOpTelemetrySink())) + queue, err := newBlobQueue(tmpDir, log.NewNopLogger(), NewBlobFetcherMetrics(discard.NewFactory())) require.NoError(t, err) withinDA := func(_, _ math.Slot) bool { return true } diff --git a/beacon/blockchain/deposit.go b/beacon/blockchain/deposit.go index 2824a45166..11e8217470 100644 --- a/beacon/blockchain/deposit.go +++ b/beacon/blockchain/deposit.go @@ -59,11 +59,7 @@ func (s *Service) fetchAndStoreDeposits( deposits, err := s.depositContract.ReadDeposits(ctx, blockNum, blockNum) if err != nil { s.logger.Error("Failed to read deposits", "error", err) - s.metrics.sink.IncrementCounter( - "beacon_kit.execution.deposit.failed_to_get_block_logs", - "block_num", - blockNumStr, - ) + s.metrics.FailedToGetBlockLogs.With("block_num", blockNumStr).Add(1) s.failedBlocksMu.Lock() s.failedBlocks[blockNum] = struct{}{} s.failedBlocksMu.Unlock() @@ -79,11 +75,7 @@ func (s *Service) fetchAndStoreDeposits( if err = s.storageBackend.DepositStore().EnqueueDeposits(ctx, deposits); err != nil { s.logger.Error("Failed to store deposits", "error", err) - s.metrics.sink.IncrementCounter( - "beacon_kit.execution.deposit.failed_to_enqueue_deposits", - "block_num", - blockNumStr, - ) + s.metrics.FailedToEnqueueDeposits.With("block_num", blockNumStr).Add(1) s.failedBlocksMu.Lock() s.failedBlocks[blockNum] = struct{}{} s.failedBlocksMu.Unlock() diff --git a/beacon/blockchain/interfaces.go b/beacon/blockchain/interfaces.go index 0a7d104f49..3540ffd0e3 100644 --- a/beacon/blockchain/interfaces.go +++ b/beacon/blockchain/interfaces.go @@ -22,7 +22,6 @@ package blockchain import ( "context" - "time" "github.com/berachain/beacon-kit/chain" ctypes "github.com/berachain/beacon-kit/consensus-types/types" @@ -133,20 +132,6 @@ type StorageBackend interface { BlockStore() *block.KVStore[*ctypes.BeaconBlock] } -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // IncrementCounter increments the counter identified by - // the provided key. - IncrementCounter(key string, args ...string) - - // SetGauge sets a gauge metric to the specified value. - SetGauge(key string, value int64, args ...string) - - // MeasureSince measures the time since the provided start time, - // identified by the provided keys. - MeasureSince(key string, start time.Time, args ...string) -} - //nolint:revive // its ok type BlockchainI interface { ProcessGenesisData( diff --git a/beacon/blockchain/metrics.go b/beacon/blockchain/metrics.go index 0cb567a29c..526754053b 100644 --- a/beacon/blockchain/metrics.go +++ b/beacon/blockchain/metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -23,95 +23,129 @@ package blockchain import ( "time" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" + "github.com/prometheus/client_golang/prometheus" ) -// chainMetrics is a struct that contains metrics for the chain. -type chainMetrics struct { - // sink is the sink for the metrics. - sink TelemetrySink +// Metrics is a struct that contains metrics for the blockchain service. +type Metrics struct { + StateTransitionDuration metrics.Histogram + RebuildPayloadForRejectedBlockSuccess metrics.Counter + RebuildPayloadForRejectedBlockFailure metrics.Counter + OptimisticPayloadBuildSuccess metrics.Counter + OptimisticPayloadBuildFailure metrics.Counter + StateRootVerificationDuration metrics.Histogram + FailedToGetBlockLogs metrics.Counter + FailedToEnqueueDeposits metrics.Counter } -// newChainMetrics creates a new chainMetrics. -func newChainMetrics( - sink TelemetrySink, -) *chainMetrics { - return &chainMetrics{ - sink: sink, +// NewPrometheusMetrics returns a new Metrics instance with Prometheus metrics. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + StateTransitionDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "beacon_blockchain", + Name: "state_transition_duration", + Help: "Time taken to process state transition in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + RebuildPayloadForRejectedBlockSuccess: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blockchain", + Name: "rebuild_payload_for_rejected_block_success", + Help: "Number of successful payload rebuilds for rejected blocks", + }, + []string{"slot"}, + ), + RebuildPayloadForRejectedBlockFailure: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blockchain", + Name: "rebuild_payload_for_rejected_block_failure", + Help: "Number of failed payload rebuilds for rejected blocks", + }, + []string{"slot", "error"}, + ), + OptimisticPayloadBuildSuccess: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blockchain", + Name: "optimistic_payload_build_success", + Help: "Number of successful optimistic payload builds", + }, + []string{"slot"}, + ), + OptimisticPayloadBuildFailure: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blockchain", + Name: "optimistic_payload_build_failure", + Help: "Number of failed optimistic payload builds", + }, + []string{"slot", "error"}, + ), + StateRootVerificationDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "blockchain", + Name: "state_root_verification_duration", + Help: "Time taken to verify state root in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + FailedToGetBlockLogs: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_deposit", + Name: "failed_to_get_block_logs", + Help: "Number of times failed to read deposits from execution layer block logs", + }, + []string{"block_num"}, + ), + FailedToEnqueueDeposits: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_deposit", + Name: "failed_to_enqueue_deposits", + Help: "Number of times failed to enqueue deposits to storage", + }, + []string{"block_num"}, + ), } } -// measureStateTransitionDuration measures the time to process -// the state transition for a block. -func (cm *chainMetrics) measureStateTransitionDuration( - start time.Time, -) { - cm.sink.MeasureSince( - "beacon_kit.beacon.blockchain.state_transition_duration", - start, - ) +// measureStateTransitionDuration measures the time to process the state transition for a block. +func (m *Metrics) measureStateTransitionDuration(start time.Time) { + m.StateTransitionDuration.Observe(time.Since(start).Seconds()) } -// markRebuildPayloadForRejectedBlockSuccess increments the counter for the -// number of times +// markRebuildPayloadForRejectedBlockSuccess increments the counter for the number of times // the validator successfully rebuilt the payload for a rejected block. -func (cm *chainMetrics) markRebuildPayloadForRejectedBlockSuccess( - slot math.Slot, -) { - cm.sink.IncrementCounter( - "beacon_kit.blockchain.rebuild_payload_for_rejected_block_success", - "slot", - slot.Base10(), - ) +func (m *Metrics) markRebuildPayloadForRejectedBlockSuccess(slot math.Slot) { + m.RebuildPayloadForRejectedBlockSuccess.With("slot", slot.Base10()).Add(1) } // markRebuildPayloadForRejectedBlockFailure increments the counter for the // number of times the validator failed to build an optimistic payload // due to a failure. -func (cm *chainMetrics) markRebuildPayloadForRejectedBlockFailure( - slot math.Slot, - err error, -) { - cm.sink.IncrementCounter( - "beacon_kit.blockchain.rebuild_payload_for_rejected_block_failure", - "slot", - slot.Base10(), - "error", - err.Error(), - ) +func (m *Metrics) markRebuildPayloadForRejectedBlockFailure(slot math.Slot, err error) { + m.RebuildPayloadForRejectedBlockFailure.With("slot", slot.Base10(), "error", err.Error()).Add(1) } // markOptimisticPayloadBuildSuccess increments the counter for the number of // times the validator successfully built an optimistic payload. -func (cm *chainMetrics) markOptimisticPayloadBuildSuccess(slot math.Slot) { - cm.sink.IncrementCounter( - "beacon_kit.blockchain.optimistic_payload_build_success", - "slot", - slot.Base10(), - ) +func (m *Metrics) markOptimisticPayloadBuildSuccess(slot math.Slot) { + m.OptimisticPayloadBuildSuccess.With("slot", slot.Base10()).Add(1) } // markOptimisticPayloadBuildFailure increments the counter for the number of // times the validator failed to build an optimistic payload. -func (cm *chainMetrics) markOptimisticPayloadBuildFailure( - slot math.Slot, - err error, -) { - cm.sink.IncrementCounter( - "beacon_kit.blockchain.optimistic_payload_build_failure", - "slot", - slot.Base10(), - "error", - err.Error(), - ) +func (m *Metrics) markOptimisticPayloadBuildFailure(slot math.Slot, err error) { + m.OptimisticPayloadBuildFailure.With("slot", slot.Base10(), "error", err.Error()).Add(1) } // TODO: remove once state caching is activated -// measureStateRootVerificationTime measures the time taken to verify the state -// root of a block. +// measureStateRootVerificationTime measures the time taken to verify the state root of a block. // It records the duration from the provided start time to the current time. -func (cm *chainMetrics) measureStateRootVerificationTime(start time.Time) { - cm.sink.MeasureSince( - "beacon_kit.blockchain.state_root_verification_duration", start, - ) +func (m *Metrics) measureStateRootVerificationTime(start time.Time) { + m.StateRootVerificationDuration.Observe(time.Since(start).Seconds()) } diff --git a/beacon/blockchain/payload_test.go b/beacon/blockchain/payload_test.go index 913f8e1bbc..7a036691c1 100644 --- a/beacon/blockchain/payload_test.go +++ b/beacon/blockchain/payload_test.go @@ -38,7 +38,7 @@ import ( engineprimitives "github.com/berachain/beacon-kit/engine-primitives/engine-primitives" "github.com/berachain/beacon-kit/errors" gethprimitives "github.com/berachain/beacon-kit/geth-primitives" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/payload/builder" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/constants" @@ -180,7 +180,7 @@ func TestOptimisticBlockBuildingVerifiedBlockStateChecks(t *testing.T) { // BUILD A VALID BLOCK (without polluting state st) sdkCtx := sdk.NewContext(cms.CacheMultiStore(), true, log.NewNopLogger()) buildState := state.NewBeaconStateFromDB( - st.KVStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), metrics.NewNoOpTelemetrySink(), + st.KVStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), state.NewMetrics(discard.NewFactory()), ) nextBlkTimestamp := math.U64(cs.GenesisTime() + 1) @@ -268,7 +268,7 @@ func setupOptimisticPayloadTests(t *testing.T, cs chain.Spec) ( sp, st, depStore, ctx, cms, eng := statetransition.SetupTestState(t, cs) logger := log.NewNopLogger() - ts := metrics.NewNoOpTelemetrySink() + ts := blockchain.NewMetrics(discard.NewFactory()) sb := bcmocks.NewStorageBackend(t) b := bcmocks.NewLocalBuilder(t) diff --git a/beacon/blockchain/service.go b/beacon/blockchain/service.go index 83a20ce5f4..27c226cd20 100644 --- a/beacon/blockchain/service.go +++ b/beacon/blockchain/service.go @@ -62,7 +62,7 @@ type Service struct { // stateProcessor is the state processor for beacon blocks and states. stateProcessor StateProcessor // metrics is the metrics for the service. - metrics *chainMetrics + metrics *Metrics // forceStartupSyncOnce is used to force a sync of the startup head. forceStartupSyncOnce *sync.Once @@ -83,7 +83,7 @@ func NewService( executionEngine ExecutionEngine, localBuilder LocalBuilder, stateProcessor StateProcessor, - telemetrySink TelemetrySink, + metrics *Metrics, ) *Service { return &Service{ storageBackend: storageBackend, @@ -97,7 +97,7 @@ func NewService( executionEngine: executionEngine, localBuilder: localBuilder, stateProcessor: stateProcessor, - metrics: newChainMetrics(telemetrySink), + metrics: metrics, forceStartupSyncOnce: new(sync.Once), } } diff --git a/beacon/validator/interfaces.go b/beacon/validator/interfaces.go index b15e4777b2..4985f3b911 100644 --- a/beacon/validator/interfaces.go +++ b/beacon/validator/interfaces.go @@ -22,7 +22,6 @@ package validator import ( "context" - "time" ctypes "github.com/berachain/beacon-kit/consensus-types/types" "github.com/berachain/beacon-kit/consensus/types" @@ -94,16 +93,6 @@ type StorageBackend interface { StateFromContext(context.Context) *statedb.StateDB } -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // IncrementCounter increments a counter metric identified by the provided - // keys. - IncrementCounter(key string, args ...string) - // MeasureSince measures the time since the provided start time, - // identified by the provided keys. - MeasureSince(key string, start time.Time, args ...string) -} - type BlockBuilderI interface { BuildBlockAndSidecars( context.Context, diff --git a/beacon/validator/metrics.go b/beacon/validator/metrics.go index 9f9a4baf94..1dc606a831 100644 --- a/beacon/validator/metrics.go +++ b/beacon/validator/metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -23,54 +23,68 @@ package validator import ( "time" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" + "github.com/prometheus/client_golang/prometheus" ) -// validatorMetrics is a struct that contains metrics for the chain. -type validatorMetrics struct { - // sink is the sink for the metrics. - sink TelemetrySink +// Metrics is a struct that contains metrics for the validator service. +type Metrics struct { + // RequestBlockForProposalDuration tracks time to request block for proposal + RequestBlockForProposalDuration metrics.Histogram + + // StateRootComputationDuration tracks time to compute state root + StateRootComputationDuration metrics.Histogram + + // FailedToRetrievePayload tracks failed payload retrievals + FailedToRetrievePayload metrics.Counter } -// newValidatorMetrics creates a new validatorMetrics. -func newValidatorMetrics( - sink TelemetrySink, -) *validatorMetrics { - return &validatorMetrics{ - sink: sink, +// NewMetrics returns a new Metrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + RequestBlockForProposalDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "validator", + Name: "request_block_for_proposal_duration", + Help: "Time taken to request block for proposal in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + StateRootComputationDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "validator", + Name: "state_root_computation_duration", + Help: "Time taken to compute state root in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + FailedToRetrievePayload: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "validator", + Name: "failed_to_retrieve_payload", + Help: "Number of times validator failed to retrieve payload", + }, + []string{"slot", "error"}, + ), } } -// measureRequestBlockForProposalTime measures the time taken to run the request -// best -// block function. -func (cm *validatorMetrics) measureRequestBlockForProposalTime( - start time.Time, -) { - cm.sink.MeasureSince( - "beacon_kit.validator.request_block_for_proposal_duration", start, - ) +// measureRequestBlockForProposalTime measures the time taken to request block for proposal. +func (m *Metrics) measureRequestBlockForProposalTime(start time.Time) { + m.RequestBlockForProposalDuration.Observe(time.Since(start).Seconds()) } -// measureStateRootComputationTime measures the time taken to compute the state -// root of a block. -// It records the duration from the provided start time to the current time. -func (cm *validatorMetrics) measureStateRootComputationTime(start time.Time) { - cm.sink.MeasureSince( - "beacon_kit.validator.state_root_computation_duration", start, - ) +// measureStateRootComputationTime measures the time taken to compute the state root of a block. +func (m *Metrics) measureStateRootComputationTime(start time.Time) { + m.StateRootComputationDuration.Observe(time.Since(start).Seconds()) } -// failedToRetrievePayload increments the counter for the number of -// times the validator failed to retrieve payloads. -func (cm *validatorMetrics) failedToRetrievePayload( - slot math.Slot, err error, -) { - cm.sink.IncrementCounter( - "beacon_kit.validator.failed_to_retrieve_payload", - "slot", - slot.Base10(), - "error", - err.Error(), - ) +// failedToRetrievePayload increments the counter for the number of times the validator +// failed to retrieve payloads. +func (m *Metrics) failedToRetrievePayload(slot math.Slot, err error) { + m.FailedToRetrievePayload.With("slot", slot.Base10(), "error", err.Error()).Add(1) } diff --git a/beacon/validator/service.go b/beacon/validator/service.go index 190afc915f..fa4b18bab7 100644 --- a/beacon/validator/service.go +++ b/beacon/validator/service.go @@ -49,7 +49,7 @@ type Service struct { // The local Builder. localPayloadBuilder PayloadBuilder // metrics is a metrics collector. - metrics *validatorMetrics + metrics *Metrics } // NewService creates a new validator service. @@ -62,7 +62,7 @@ func NewService( signer crypto.BLSSigner, blobFactory BlobFactory, localPayloadBuilder PayloadBuilder, - ts TelemetrySink, + metrics *Metrics, ) *Service { return &Service{ cfg: cfg, @@ -73,7 +73,7 @@ func NewService( stateProcessor: stateProcessor, blobFactory: blobFactory, localPayloadBuilder: localPayloadBuilder, - metrics: newValidatorMetrics(ts), + metrics: metrics, } } diff --git a/cmd/beacond/defaults.go b/cmd/beacond/defaults.go index bb2fea0324..582483cc02 100644 --- a/cmd/beacond/defaults.go +++ b/cmd/beacond/defaults.go @@ -51,12 +51,12 @@ func DefaultComponents() []any { components.ProvideStateProcessor, components.ProvideKVStore, components.ProvideStorageBackend, - components.ProvideTelemetrySink, - components.ProvideTelemetryService, components.ProvideTrustedSetup, components.ProvideValidatorService, components.ProvideNodeAPIServer, components.ProvideShutDownService, } + // Add all metrics providers (factory + individual metrics) + c = append(c, components.AllMetricsProviders()...) return c } diff --git a/config/config/config.go b/config/config/config.go index 43f63b3cb2..311eabdbbc 100644 --- a/config/config/config.go +++ b/config/config/config.go @@ -24,7 +24,6 @@ import ( "fmt" pruningtypes "cosmossdk.io/store/pruning/types" - "github.com/cosmos/cosmos-sdk/telemetry" "github.com/spf13/viper" ) @@ -77,12 +76,20 @@ type BaseConfig struct { IAVLDisableFastNode bool `mapstructure:"iavl-disable-fastnode"` } +// TelemetryConfig defines the telemetry configuration for beacon-kit metrics. +type TelemetryConfig struct { + // Enabled enables Prometheus metrics collection. + // When true, all beacon-kit services emit Prometheus metrics. + // When false, all metrics are no-op with zero runtime overhead. + Enabled bool `mapstructure:"enabled"` +} + // Config defines the server's top level configuration. type Config struct { BaseConfig `mapstructure:",squash"` // Telemetry defines the application telemetry configuration - Telemetry telemetry.Config `mapstructure:"telemetry"` + Telemetry TelemetryConfig `mapstructure:"telemetry"` } // DefaultConfig returns server's default configuration. @@ -98,9 +105,8 @@ func DefaultConfig() *Config { IAVLCacheSize: 5000, IAVLDisableFastNode: false, }, - Telemetry: telemetry.Config{ - Enabled: false, - GlobalLabels: [][]string{}, + Telemetry: TelemetryConfig{ + Enabled: false, }, } } diff --git a/config/config/config.toml.tpl b/config/config/config.toml.tpl index 1e8415370d..dbe5793cc6 100644 --- a/config/config/config.toml.tpl +++ b/config/config/config.toml.tpl @@ -61,42 +61,8 @@ iavl-disable-fastnode = {{ .BaseConfig.IAVLDisableFastNode }} [telemetry] -# Prefixed with keys to separate services. -service-name = "{{ .Telemetry.ServiceName }}" - -# Enabled enables the application telemetry functionality. When enabled, -# an in-memory sink is also enabled by default. Operators may also enabled -# other sinks such as Prometheus. +# Enabled enables Prometheus metrics collection for all beacon-kit services. +# When true, metrics are exposed at the configured Prometheus endpoint. +# When false, all metrics are no-op with zero runtime overhead. +# Default: false enabled = {{ .Telemetry.Enabled }} - -# Enable prefixing gauge values with hostname. -enable-hostname = {{ .Telemetry.EnableHostname }} - -# Enable adding hostname to labels. -enable-hostname-label = {{ .Telemetry.EnableHostnameLabel }} - -# Enable adding service to labels. -enable-service-label = {{ .Telemetry.EnableServiceLabel }} - -# PrometheusRetentionTime, when positive, enables a Prometheus metrics sink. -prometheus-retention-time = {{ .Telemetry.PrometheusRetentionTime }} - -# GlobalLabels defines a global set of name/value label tuples applied to all -# metrics emitted using the wrapper functions defined in telemetry package. -# -# Example: -# [["chain_id", "cosmoshub-1"]] -global-labels = [{{ range $k, $v := .Telemetry.GlobalLabels }} - ["{{index $v 0 }}", "{{ index $v 1}}"],{{ end }} -] - -# MetricsSink defines the type of metrics sink to use. -metrics-sink = "{{ .Telemetry.MetricsSink }}" - -# StatsdAddr defines the address of a statsd server to send metrics to. -# Only utilized if MetricsSink is set to "statsd" or "dogstatsd". -statsd-addr = "{{ .Telemetry.StatsdAddr }}" - -# DatadogHostname defines the hostname to use when emitting metrics to -# Datadog. Only utilized if MetricsSink is set to "dogstatsd". -datadog-hostname = "{{ .Telemetry.DatadogHostname }}" \ No newline at end of file diff --git a/consensus/cometbft/service/abci.go b/consensus/cometbft/service/abci.go index 7a8f2ea26e..45257b1acd 100644 --- a/consensus/cometbft/service/abci.go +++ b/consensus/cometbft/service/abci.go @@ -160,11 +160,9 @@ func (s *Service) Query( req.Height = s.lastBlockHeight() } - s.telemetrySink.IncrementCounter("beacon_kit.comet.query_count", "path", req.Path) + s.metrics.QueryCount.With("path", req.Path).Add(1) startTime := time.Now() - defer s.telemetrySink.MeasureSince( - "beacon_kit.comet.query_duration", startTime, "path", req.Path, - ) + defer s.metrics.measureQueryDuration(startTime, req.Path) path := splitABCIQueryPath(req.Path) if len(path) == 0 { diff --git a/consensus/cometbft/service/interfaces.go b/consensus/cometbft/service/interfaces.go index 8d4a430c0e..454322b2f4 100644 --- a/consensus/cometbft/service/interfaces.go +++ b/consensus/cometbft/service/interfaces.go @@ -21,8 +21,6 @@ package cometbft import ( - "time" - "github.com/cometbft/cometbft/p2p" ) @@ -32,11 +30,3 @@ type BlobReactorI interface { // SetNodeKey sets the node Key for the reactor. SetNodeKey(nodeKey string) } - -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // IncrementCounter increments a counter for the given key. - IncrementCounter(key string, args ...string) - // MeasureSince measures the time since the given time. - MeasureSince(key string, start time.Time, args ...string) -} diff --git a/consensus/cometbft/service/metrics.go b/consensus/cometbft/service/metrics.go new file mode 100644 index 0000000000..9f9c5467cd --- /dev/null +++ b/consensus/cometbft/service/metrics.go @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package cometbft + +import ( + "time" + + "github.com/berachain/beacon-kit/observability/metrics" + prominternal "github.com/prometheus/client_golang/prometheus" +) + +// Metrics holds metrics for the CometBFT service. +// +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +type Metrics struct { + // QueryCount tracks the number of ABCI queries received, labeled by query path. + QueryCount metrics.Counter + + // QueryDuration tracks the time taken to process ABCI queries, labeled by query path. + QueryDuration metrics.Histogram + + // PrepareProposalDuration tracks the time taken to prepare a proposal. + PrepareProposalDuration metrics.Histogram + + // ProcessProposalDuration tracks the time taken to process a proposal. + ProcessProposalDuration metrics.Histogram +} + +// NewMetrics creates a new Metrics instance using the provided factory. +// The factory determines whether real Prometheus metrics or no-op metrics are created. +// +//nolint:mnd // magic numbers are histogram bucket ranges for timing metrics +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + QueryCount: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "comet", + Name: "query_count", + Help: "Total number of ABCI queries received", + }, + []string{"path"}, + ), + QueryDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "comet", + Name: "query_duration", + Help: "Time taken to process ABCI queries in seconds", + Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"path"}, + ), + PrepareProposalDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "runtime", + Name: "prepare_proposal_duration", + Help: "Time taken to prepare a proposal in seconds", + Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + ProcessProposalDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "runtime", + Name: "process_proposal_duration", + Help: "Time taken to process a proposal in seconds", + Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + } +} + +// measureQueryDuration is a helper to measure query duration. +func (m *Metrics) measureQueryDuration(start time.Time, path string) { + m.QueryDuration.With("path", path).Observe(time.Since(start).Seconds()) +} + +// measurePrepareProposalDuration is a helper to measure prepare proposal duration. +func (m *Metrics) measurePrepareProposalDuration(start time.Time) { + m.PrepareProposalDuration.Observe(time.Since(start).Seconds()) +} + +// measureProcessProposalDuration is a helper to measure process proposal duration. +func (m *Metrics) measureProcessProposalDuration(start time.Time) { + m.ProcessProposalDuration.Observe(time.Since(start).Seconds()) +} diff --git a/consensus/cometbft/service/prepare_proposal.go b/consensus/cometbft/service/prepare_proposal.go index cf98c23285..0836ea6319 100644 --- a/consensus/cometbft/service/prepare_proposal.go +++ b/consensus/cometbft/service/prepare_proposal.go @@ -35,8 +35,7 @@ func (s *Service) prepareProposal( req *cmtabci.PrepareProposalRequest, ) (*cmtabci.PrepareProposalResponse, error) { startTime := time.Now() - defer s.telemetrySink.MeasureSince( - "beacon_kit.runtime.prepare_proposal_duration", startTime) + defer s.metrics.measurePrepareProposalDuration(startTime) // CometBFT must never call PrepareProposal with a height of 0. if req.Height < 1 { diff --git a/consensus/cometbft/service/process_proposal.go b/consensus/cometbft/service/process_proposal.go index bdc5e5b79e..a3e8c18fd5 100644 --- a/consensus/cometbft/service/process_proposal.go +++ b/consensus/cometbft/service/process_proposal.go @@ -35,8 +35,7 @@ func (s *Service) processProposal( req *cmtabci.ProcessProposalRequest, ) (*cmtabci.ProcessProposalResponse, error) { startTime := time.Now() - defer s.telemetrySink.MeasureSince( - "beacon_kit.runtime.process_proposal_duration", startTime) + defer s.metrics.measureProcessProposalDuration(startTime) // CometBFT must never call ProcessProposal with a height of 0. if req.Height < 1 { diff --git a/consensus/cometbft/service/service.go b/consensus/cometbft/service/service.go index eb12b285b2..82ecd7fdf5 100644 --- a/consensus/cometbft/service/service.go +++ b/consensus/cometbft/service/service.go @@ -69,7 +69,7 @@ type Service struct { // Loaded from config file (config.toml), not part of state. cmtCfg *cmtcfg.Config - telemetrySink TelemetrySink + metrics *Metrics logger *phuslu.Logger sm *statem.Manager @@ -116,7 +116,7 @@ func NewService( blobReactor BlobReactorI, cs chain.Spec, cmtCfg *cmtcfg.Config, - telemetrySink TelemetrySink, + metrics *Metrics, options ...func(*Service), ) *Service { if err := validateConfig(cmtCfg); err != nil { @@ -141,7 +141,7 @@ func NewService( chainSpec: cs, cmtConsensusParams: cmtConsensusParams, cmtCfg: cmtCfg, - telemetrySink: telemetrySink, + metrics: metrics, cachedStates: cache.New(), } diff --git a/consensus/cometbft/service/state/state.go b/consensus/cometbft/service/state/state.go index 0153beb920..2ec0f37ffa 100644 --- a/consensus/cometbft/service/state/state.go +++ b/consensus/cometbft/service/state/state.go @@ -25,8 +25,8 @@ import ( "cosmossdk.io/log" "cosmossdk.io/store" - storemetrics "cosmossdk.io/store/metrics" storetypes "cosmossdk.io/store/types" + "github.com/berachain/beacon-kit/storage" dbm "github.com/cosmos/cosmos-db" ) @@ -46,7 +46,7 @@ func NewManager( cms: store.NewCommitMultiStore( db, logger, - storemetrics.NewNoOpMetrics(), + storage.NoOpStoreMetrics{}, ), } diff --git a/da/blob/factory.go b/da/blob/factory.go index 033a2eb8d5..53a9631681 100644 --- a/da/blob/factory.go +++ b/da/blob/factory.go @@ -36,15 +36,15 @@ import ( // SidecarFactory is a factory for sidecars. type SidecarFactory struct { // metrics is used to collect and report factory metrics. - metrics *factoryMetrics + metrics *FactoryMetrics } // NewSidecarFactory creates a new sidecar factory. func NewSidecarFactory( - telemetrySink TelemetrySink, + metrics *FactoryMetrics, ) *SidecarFactory { return &SidecarFactory{ - metrics: newFactoryMetrics(telemetrySink), + metrics: metrics, } } diff --git a/da/blob/factory_metrics.go b/da/blob/factory_metrics.go index 06854941cd..e3cb382e48 100644 --- a/da/blob/factory_metrics.go +++ b/da/blob/factory_metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -23,65 +23,86 @@ package blob import ( "time" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" + "github.com/prometheus/client_golang/prometheus" ) -// factoryMetrics is a struct that contains metrics for the factory. -type factoryMetrics struct { - // TelemetrySink is the sink for the metrics. - sink TelemetrySink +// FactoryMetrics is a struct that contains metrics for the sidecar factory. +type FactoryMetrics struct { + BuildSidecarDuration metrics.Histogram + BuildKZGInclusionProofDuration metrics.Histogram + BuildBlockBodyProofDuration metrics.Histogram + BuildCommitmentProofDuration metrics.Histogram } -// newFactoryMetrics creates a new factoryMetrics. -func newFactoryMetrics( - sink TelemetrySink, -) *factoryMetrics { - return &factoryMetrics{ - sink: sink, +// NewFactoryMetrics returns a new FactoryMetrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { + return &FactoryMetrics{ + BuildSidecarDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_factory", + Name: "build_sidecar_duration", + Help: "Time taken to build blob sidecars in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars"}, + ), + BuildKZGInclusionProofDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_factory", + Name: "build_kzg_inclusion_proof_duration", + Help: "Time taken to build KZG inclusion proof in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + BuildBlockBodyProofDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_factory", + Name: "build_block_body_proof_duration", + Help: "Time taken to build block body proof in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + BuildCommitmentProofDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_factory", + Name: "build_commitment_proof_duration", + Help: "Time taken to build commitment proof in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), } } // measureBuildSidecarsDuration measures the duration of the build sidecars. -func (fm *factoryMetrics) measureBuildSidecarsDuration( +func (m *FactoryMetrics) measureBuildSidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - fm.sink.MeasureSince( - "beacon_kit.da.blob.factory.build_sidecar_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - ) + m.BuildSidecarDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) } -// measureBuildKZGInclusionProofDuration measures the duration of the build KZG -// inclusion proof. -func (fm *factoryMetrics) measureBuildKZGInclusionProofDuration( +// measureBuildKZGInclusionProofDuration measures the duration of the build KZG inclusion proof. +func (m *FactoryMetrics) measureBuildKZGInclusionProofDuration( startTime time.Time, ) { - fm.sink.MeasureSince( - "beacon_kit.da.blob.factory.build_kzg_inclusion_proof_duration", - startTime, - ) + m.BuildKZGInclusionProofDuration.Observe(time.Since(startTime).Seconds()) } -// measureBuildBlockBodyProofDuration measures the duration of the build block -// body proof. -func (fm *factoryMetrics) measureBuildBlockBodyProofDuration( +// measureBuildBlockBodyProofDuration measures the duration of the build block body proof. +func (m *FactoryMetrics) measureBuildBlockBodyProofDuration( startTime time.Time, ) { - fm.sink.MeasureSince( - "beacon_kit.da.blob.factory.build_block_body_proof_duration", - startTime, - ) + m.BuildBlockBodyProofDuration.Observe(time.Since(startTime).Seconds()) } -// measureBuildCommitmentProofDuration measures the duration of the build -// commitment proof. -func (fm *factoryMetrics) measureBuildCommitmentProofDuration( +// measureBuildCommitmentProofDuration measures the duration of the build commitment proof. +func (m *FactoryMetrics) measureBuildCommitmentProofDuration( startTime time.Time, ) { - fm.sink.MeasureSince( - "beacon_kit.da.blob.factory.build_commitment_proof_duration", - startTime, - ) + m.BuildCommitmentProofDuration.Observe(time.Since(startTime).Seconds()) } diff --git a/da/blob/interfaces.go b/da/blob/interfaces.go index 6080825951..903f7ba499 100644 --- a/da/blob/interfaces.go +++ b/da/blob/interfaces.go @@ -19,14 +19,3 @@ // TITLE. package blob - -import ( - "time" -) - -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // MeasureSince measures the time since the provided start time, - // identified by the provided keys. - MeasureSince(key string, start time.Time, args ...string) -} diff --git a/da/blob/processor.go b/da/blob/processor.go index 3e6e6c0128..3e2c3debf2 100644 --- a/da/blob/processor.go +++ b/da/blob/processor.go @@ -42,21 +42,22 @@ type Processor struct { // verifier is responsible for verifying the blobs. verifier *verifier // metrics is used to collect and report processor metrics. - metrics *processorMetrics + metrics *ProcessorMetrics } // NewProcessor creates a new blob processor. func NewProcessor( logger log.Logger, proofVerifier kzg.BlobProofVerifier, - telemetrySink TelemetrySink, + processorMetrics *ProcessorMetrics, + verifierMetrics *VerifierMetrics, ) *Processor { - verifier := newVerifier(proofVerifier, telemetrySink) + verifier := newVerifier(proofVerifier, verifierMetrics) return &Processor{ logger: logger, verifier: verifier, - metrics: newProcessorMetrics(telemetrySink), + metrics: processorMetrics, } } diff --git a/da/blob/processor_metrics.go b/da/blob/processor_metrics.go index caedd1bd1b..306b973812 100644 --- a/da/blob/processor_metrics.go +++ b/da/blob/processor_metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -23,46 +23,54 @@ package blob import ( "time" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" + "github.com/prometheus/client_golang/prometheus" ) -// processorMetrics is a struct that contains metrics for the processor. -type processorMetrics struct { - // TelemetrySink is the sink for the metrics. - sink TelemetrySink +// ProcessorMetrics is a struct that contains metrics for the blob processor. +type ProcessorMetrics struct { + VerifyBlobsDuration metrics.Histogram + ProcessBlobDuration metrics.Histogram } -// newProcessorMetrics creates a new processorMetrics. -func newProcessorMetrics( - sink TelemetrySink, -) *processorMetrics { - return &processorMetrics{ - sink: sink, +// NewProcessorMetrics returns a new ProcessorMetrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewProcessorMetrics(factory metrics.Factory) *ProcessorMetrics { + return &ProcessorMetrics{ + VerifyBlobsDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_processor", + Name: "verify_blobs_duration", + Help: "Time taken to verify blob sidecars in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars"}, + ), + ProcessBlobDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_processor", + Name: "process_blob_duration", + Help: "Time taken to process blob sidecars in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars"}, + ), } } // measureVerifySidecarsDuration measures the duration of the blob verification. -func (pm *processorMetrics) measureVerifySidecarsDuration( +func (m *ProcessorMetrics) measureVerifySidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - pm.sink.MeasureSince( - "beacon_kit.da.blob.processor.verify_blobs_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - ) + m.VerifyBlobsDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) } // measureProcessSidecarsDuration measures the duration of the blob processing. -func (pm *processorMetrics) measureProcessSidecarsDuration( +func (m *ProcessorMetrics) measureProcessSidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - pm.sink.MeasureSince( - "beacon_kit.da.blob.processor.process_blob_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - ) + m.ProcessBlobDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) } diff --git a/da/blob/verifier.go b/da/blob/verifier.go index 81bd064823..b1c16e6ddb 100644 --- a/da/blob/verifier.go +++ b/da/blob/verifier.go @@ -40,17 +40,17 @@ type verifier struct { // proofVerifier is used to verify the KZG proofs of the blobs. proofVerifier kzg.BlobProofVerifier // metrics collects and reports metrics related to the verification process. - metrics *verifierMetrics + metrics *VerifierMetrics } // newVerifier creates a new Verifier with the given proof verifier. func newVerifier( proofVerifier kzg.BlobProofVerifier, - telemetrySink TelemetrySink, + metrics *VerifierMetrics, ) *verifier { return &verifier{ proofVerifier: proofVerifier, - metrics: newVerifierMetrics(telemetrySink), + metrics: metrics, } } diff --git a/da/blob/verifier_metrics.go b/da/blob/verifier_metrics.go index 9a2d6c5c89..1e09453ffd 100644 --- a/da/blob/verifier_metrics.go +++ b/da/blob/verifier_metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -23,67 +23,80 @@ package blob import ( "time" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" + "github.com/prometheus/client_golang/prometheus" ) -// verifierMetrics is a struct that contains metrics for the verifier. -type verifierMetrics struct { - // TelemetrySink is the sink for the metrics. - sink TelemetrySink +// VerifierMetrics is a struct that contains metrics for the blob verifier. +type VerifierMetrics struct { + VerifyBlobsDuration metrics.Histogram + VerifyInclusionProofsDuration metrics.Histogram + VerifyKZGProofsDuration metrics.Histogram } -// newVerifierMetrics creates a new verifierMetrics. -func newVerifierMetrics( - sink TelemetrySink, -) *verifierMetrics { - return &verifierMetrics{ - sink: sink, +// NewVerifierMetrics returns a new VerifierMetrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewVerifierMetrics(factory metrics.Factory) *VerifierMetrics { + return &VerifierMetrics{ + VerifyBlobsDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_verifier", + Name: "verify_blobs_duration", + Help: "Time taken to verify blobs in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars", "kzg_implementation"}, + ), + VerifyInclusionProofsDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_verifier", + Name: "verify_inclusion_proofs_duration", + Help: "Time taken to verify inclusion proofs in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars"}, + ), + VerifyKZGProofsDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "da_blob_verifier", + Name: "verify_kzg_proofs_duration", + Help: "Time taken to verify KZG proofs in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"num_sidecars", "kzg_implementation"}, + ), } } // measureVerifySidecarsDuration measures the duration of the blob verification. -func (vm *verifierMetrics) measureVerifySidecarsDuration( +func (m *VerifierMetrics) measureVerifySidecarsDuration( startTime time.Time, numSidecars math.U64, kzgImplementation string, ) { - vm.sink.MeasureSince( - "beacon_kit.da.blob.verifier.verify_blobs_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - "kzg_implementation", - kzgImplementation, - ) + m.VerifyBlobsDuration.With( + "num_sidecars", numSidecars.Base10(), + "kzg_implementation", kzgImplementation, + ).Observe(time.Since(startTime).Seconds()) } -// measureVerifyInclusionProofsDuration measures the duration of the inclusion -// proofs verification. -func (vm *verifierMetrics) measureVerifyInclusionProofsDuration( +// measureVerifyInclusionProofsDuration measures the duration of the inclusion proofs verification. +func (m *VerifierMetrics) measureVerifyInclusionProofsDuration( startTime time.Time, numSidecars math.U64, ) { - vm.sink.MeasureSince( - "beacon_kit.da.blob.verifier.verify_inclusion_proofs_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - ) + m.VerifyInclusionProofsDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) } -// measureVerifyKZGProofsDuration measures the duration of the KZG proofs -// verification. -func (vm *verifierMetrics) measureVerifyKZGProofsDuration( +// measureVerifyKZGProofsDuration measures the duration of the KZG proofs verification. +func (m *VerifierMetrics) measureVerifyKZGProofsDuration( startTime time.Time, numSidecars math.U64, kzgImplementation string, ) { - vm.sink.MeasureSince( - "beacon_kit.da.blob.verifier.verify_kzg_proofs_duration", - startTime, - "num_sidecars", - numSidecars.Base10(), - "kzg_implementation", - kzgImplementation, - ) + m.VerifyKZGProofsDuration.With( + "num_sidecars", numSidecars.Base10(), + "kzg_implementation", kzgImplementation, + ).Observe(time.Since(startTime).Seconds()) } diff --git a/da/blobreactor/interfaces.go b/da/blobreactor/interfaces.go index 60f1b4628d..6704e025ce 100644 --- a/da/blobreactor/interfaces.go +++ b/da/blobreactor/interfaces.go @@ -20,8 +20,6 @@ package blobreactor -import "time" - // BlobStore is a minimal interface for the BlobReactor to check and serve blobs. // This matches the IndexDB interface from the AvailabilityStore. type BlobStore interface { @@ -31,15 +29,3 @@ type BlobStore interface { // GetByIndex retrieves all raw blob data for a given index (slot). GetByIndex(index uint64) ([][]byte, error) } - -// TelemetrySink is an interface for emitting metrics. -type TelemetrySink interface { - // IncrementCounter increments a counter metric identified by the provided keys. - IncrementCounter(key string, args ...string) - - // SetGauge sets a gauge metric to the specified value. - SetGauge(key string, value int64, args ...string) - - // MeasureSince measures the time since the provided start time. - MeasureSince(key string, start time.Time, args ...string) -} diff --git a/da/blobreactor/metrics.go b/da/blobreactor/metrics.go index 7b1bc8b3cc..19874c6bd6 100644 --- a/da/blobreactor/metrics.go +++ b/da/blobreactor/metrics.go @@ -22,6 +22,9 @@ package blobreactor import ( "time" + + "github.com/berachain/beacon-kit/observability/metrics" + "github.com/prometheus/client_golang/prometheus" ) // Metric status constants for blob reactor requests. @@ -38,39 +41,104 @@ const ( messageTypeResponse = "response" ) -// blobReactorMetrics contains metrics for the blob reactor P2P operations. -type blobReactorMetrics struct { - sink TelemetrySink +// Metrics contains metrics for the blob reactor P2P operations. +type Metrics struct { + RequestTotal metrics.Counter + RequestDuration metrics.Histogram + PeerAttemptsTotal metrics.Counter + WorkerPoolFullTotal metrics.Counter + ActiveRequests metrics.Gauge + PeersAvailable metrics.Gauge + PeersTotal metrics.Gauge } -// newBlobReactorMetrics creates a new blobReactorMetrics instance. -func newBlobReactorMetrics(sink TelemetrySink) *blobReactorMetrics { - return &blobReactorMetrics{sink: sink} +// NewMetrics returns a new Metrics instance with Prometheus metrics. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + RequestTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blobreactor", + Name: "request_total", + Help: "Total number of blob requests completed", + }, + []string{"status"}, + ), + RequestDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "blobreactor", + Name: "request_duration", + Help: "Time taken to complete blob requests in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + []string{"status"}, + ), + PeerAttemptsTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blobreactor", + Name: "peer_attempts_total", + Help: "Total number of peer attempts for blob requests", + }, + []string{"status"}, + ), + WorkerPoolFullTotal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "blobreactor", + Name: "worker_pool_full_total", + Help: "Number of times worker pool was full and messages were dropped", + }, + []string{"message_type"}, + ), + ActiveRequests: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "blobreactor", + Name: "active_requests", + Help: "Number of currently active blob requests", + }, + nil, + ), + PeersAvailable: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "blobreactor", + Name: "peers_available", + Help: "Number of available peers for blob requests", + }, + nil, + ), + PeersTotal: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "blobreactor", + Name: "peers_total", + Help: "Total number of connected peers", + }, + nil, + ), + } } // recordOverallRequestComplete records completion of entire blob request (may try multiple peers). -func (m *blobReactorMetrics) recordOverallRequestComplete(status string, start time.Time) { - m.sink.IncrementCounter("beacon_kit.blobreactor.request_total", "status", status) - m.sink.MeasureSince("beacon_kit.blobreactor.request_duration", start, "status", status) +func (m *Metrics) recordOverallRequestComplete(status string, start time.Time) { + m.RequestTotal.With("status", status).Add(1) + m.RequestDuration.With("status", status).Observe(time.Since(start).Seconds()) } // recordPeerAttempt records a single peer attempt with status (no duration to avoid high cardinality). -func (m *blobReactorMetrics) recordPeerAttempt(status string) { - m.sink.IncrementCounter("beacon_kit.blobreactor.peer_attempts_total", "status", status) +func (m *Metrics) recordPeerAttempt(status string) { + m.PeerAttemptsTotal.With("status", status).Add(1) } // observeWorkerPoolFull increments counter when worker pool is full and messages are dropped. -func (m *blobReactorMetrics) observeWorkerPoolFull(messageType string) { - m.sink.IncrementCounter("beacon_kit.blobreactor.worker_pool_full_total", "message_type", messageType) +func (m *Metrics) observeWorkerPoolFull(messageType string) { + m.WorkerPoolFullTotal.With("message_type", messageType).Add(1) } // setActiveRequests sets gauge for currently active blob requests. -func (m *blobReactorMetrics) setActiveRequests(count int) { - m.sink.SetGauge("beacon_kit.blobreactor.active_requests", int64(count)) +func (m *Metrics) setActiveRequests(count int) { + m.ActiveRequests.Set(float64(count)) } // setPeerPoolSize sets gauges for peer pool statistics. -func (m *blobReactorMetrics) setPeerPoolSize(available, total int) { - m.sink.SetGauge("beacon_kit.blobreactor.peers_available", int64(available)) - m.sink.SetGauge("beacon_kit.blobreactor.peers_total", int64(total)) +func (m *Metrics) setPeerPoolSize(available, total int) { + m.PeersAvailable.Set(float64(available)) + m.PeersTotal.Set(float64(total)) } diff --git a/da/blobreactor/reactor.go b/da/blobreactor/reactor.go index 440211f78e..9202e977ff 100644 --- a/da/blobreactor/reactor.go +++ b/da/blobreactor/reactor.go @@ -84,7 +84,7 @@ type BlobReactor struct { blobStore BlobStore // Storage backend for checking which blobs exist locally logger log.Logger // Logger for the reactor config Config // Config for the reactor - metrics *blobReactorMetrics + metrics *Metrics // Track peers and our head slot stateMu sync.RWMutex // Protects peers and headSlot @@ -108,13 +108,13 @@ type BlobReactor struct { } // NewBlobReactor creates a new blob reactor with storage backend -func NewBlobReactor(blobStore BlobStore, logger log.Logger, cfg Config, sink TelemetrySink) *BlobReactor { +func NewBlobReactor(blobStore BlobStore, logger log.Logger, cfg Config, metrics *Metrics) *BlobReactor { br := &BlobReactor{ peers: make(map[p2p.ID]struct{}), blobStore: blobStore, logger: logger, config: cfg, - metrics: newBlobReactorMetrics(sink), + metrics: metrics, responseChans: make(map[uint64]chan *BlobResponse), requestWorkers: make(chan struct{}, defaultMaxRequestWorkers), } diff --git a/da/blobreactor/reactor_test.go b/da/blobreactor/reactor_test.go index e78678ea4e..3ae9f781ce 100644 --- a/da/blobreactor/reactor_test.go +++ b/da/blobreactor/reactor_test.go @@ -30,6 +30,7 @@ import ( ctypes "github.com/berachain/beacon-kit/consensus-types/types" "github.com/berachain/beacon-kit/da/blobreactor" datypes "github.com/berachain/beacon-kit/da/types" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/math" "github.com/cometbft/cometbft/config" @@ -105,16 +106,10 @@ func createTestSidecars(t *testing.T, count int) datypes.BlobSidecars { func newTestReactor(t *testing.T, store blobreactor.BlobStore, config blobreactor.Config) *blobreactor.BlobReactor { t.Helper() logger := log.NewTestLogger(t) - reactor := blobreactor.NewBlobReactor(store, logger, config, noOpTelemetrySink{}) + reactor := blobreactor.NewBlobReactor(store, logger, config, blobreactor.NewMetrics(discard.NewFactory())) return reactor } -type noOpTelemetrySink struct{} - -func (noOpTelemetrySink) IncrementCounter(string, ...string) {} -func (noOpTelemetrySink) SetGauge(string, int64, ...string) {} -func (noOpTelemetrySink) MeasureSince(string, time.Time, ...string) {} - func makeTestP2PConfig(t *testing.T) *config.P2PConfig { t.Helper() p2pConfig := config.DefaultP2PConfig() diff --git a/da/types/sidecar_test.go b/da/types/sidecar_test.go index 3676d5699b..2163c5fc98 100644 --- a/da/types/sidecar_test.go +++ b/da/types/sidecar_test.go @@ -23,11 +23,11 @@ package types_test import ( "strconv" "testing" - "time" ctypes "github.com/berachain/beacon-kit/consensus-types/types" "github.com/berachain/beacon-kit/da/blob" "github.com/berachain/beacon-kit/da/types" + "github.com/berachain/beacon-kit/observability/metrics/discard" byteslib "github.com/berachain/beacon-kit/primitives/bytes" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/crypto" @@ -86,14 +86,8 @@ func TestSidecarMarshalling(t *testing.T) { ) } -type InclusionSink struct{} - -func (is InclusionSink) MeasureSince(_ string, _ time.Time, _ ...string) {} - func TestHasValidInclusionProof(t *testing.T) { t.Parallel() - - sink := InclusionSink{} tests := []struct { name string sidecars func(t *testing.T) types.BlobSidecars @@ -149,7 +143,7 @@ func TestHasValidInclusionProof(t *testing.T) { t.Helper() block := utils.GenerateValidBeaconBlock(t, version.Electra()) - sidecarFactory := blob.NewSidecarFactory(sink) + sidecarFactory := blob.NewSidecarFactory(blob.NewFactoryMetrics(discard.NewFactory())) numBlobs := len(block.GetBody().GetBlobKzgCommitments()) sidecars := make(types.BlobSidecars, numBlobs) for i := range numBlobs { diff --git a/execution/client/client.go b/execution/client/client.go index 67dc0a4b4c..789929b788 100644 --- a/execution/client/client.go +++ b/execution/client/client.go @@ -44,8 +44,8 @@ type EngineClient struct { logger log.Logger // eth1ChainID is the chain ID of the execution client. eth1ChainID *big.Int - // clientMetrics is the metrics for the engine client. - metrics *clientMetrics + // metrics is the metrics for the engine client. + metrics *Metrics // capabilities is a map of capabilities that the execution client has. capabilities map[string]struct{} // connected will be set to true when we have successfully connected @@ -61,7 +61,7 @@ func New( cfg *Config, logger log.Logger, jwtSecret *jwt.Secret, - telemetrySink TelemetrySink, + metrics *Metrics, eth1ChainID *big.Int, ) *EngineClient { ethClient := ethclientrpc.NewClient( @@ -96,7 +96,7 @@ func New( Client: ethclient.New(ethClient), capabilities: make(map[string]struct{}), eth1ChainID: eth1ChainID, - metrics: newClientMetrics(telemetrySink, logger), + metrics: metrics, connected: false, } } diff --git a/execution/client/interfaces.go b/execution/client/interfaces.go index 6f2f2ef437..033600a8ce 100644 --- a/execution/client/interfaces.go +++ b/execution/client/interfaces.go @@ -19,17 +19,3 @@ // TITLE. package client - -import ( - "time" -) - -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // IncrementCounter increments a counter metric identified by the provided - // keys. - IncrementCounter(key string, args ...string) - // MeasureSince measures the time since the provided start time, - // identified by the provided keys. - MeasureSince(key string, start time.Time, args ...string) -} diff --git a/execution/client/metrics.go b/execution/client/metrics.go index 4d1c96480c..dfb52dcdd4 100644 --- a/execution/client/metrics.go +++ b/execution/client/metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -24,168 +24,288 @@ import ( "time" "github.com/berachain/beacon-kit/log" + "github.com/berachain/beacon-kit/observability/metrics" + "github.com/prometheus/client_golang/prometheus" ) -// clientMetrics is a struct that contains metrics for the engine. -type clientMetrics struct { - // TelemetrySink is the sink for the metrics. - sink TelemetrySink - // logger is the logger for the engineMetrics. +// Metrics is a struct that contains metrics for the execution client. +type Metrics struct { + // Duration histograms + ForkchoiceUpdateDuration metrics.Histogram + NewPayloadDuration metrics.Histogram + GetPayloadDuration metrics.Histogram + + // Timeout counters + EngineAPITimeout metrics.Counter + ForkchoiceUpdateDurationTimeout metrics.Counter + NewPayloadDurationTimeout metrics.Counter + GetPayloadDurationTimeout metrics.Counter + HTTPTimeout metrics.Counter + + // Error counters + ParseError metrics.Counter + InvalidRequest metrics.Counter + MethodNotFound metrics.Counter + InvalidParams metrics.Counter + InternalError metrics.Counter + UnknownPayloadError metrics.Counter + InvalidForkchoiceState metrics.Counter + InvalidPayloadAttributes metrics.Counter + RequestTooLarge metrics.Counter + InternalServerError metrics.Counter + logger log.Logger } -// newClientMetrics creates a new engineMetrics. -func newClientMetrics( - sink TelemetrySink, - logger log.Logger, -) *clientMetrics { - return &clientMetrics{ - sink: sink, +// NewPrometheusMetrics returns a new Metrics instance with Prometheus metrics. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +// +//nolint:funlen +func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { + return &Metrics{ + // Duration histograms + ForkchoiceUpdateDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "execution_client", + Name: "forkchoice_update_duration", + Help: "Time taken for forkchoice update in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + NewPayloadDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "execution_client", + Name: "new_payload_duration", + Help: "Time taken for new payload in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + GetPayloadDuration: factory.NewHistogram( + metrics.HistogramOpts{ + Subsystem: "execution_client", + Name: "get_payload_duration", + Help: "Time taken for get payload in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + }, + nil, + ), + + // Timeout counters + EngineAPITimeout: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "engine_api_timeout", + Help: "Number of engine API timeouts", + }, + nil, + ), + ForkchoiceUpdateDurationTimeout: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "forkchoice_update_duration_timeout", + Help: "Number of forkchoice update timeouts", + }, + nil, + ), + NewPayloadDurationTimeout: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "new_payload_duration_timeout", + Help: "Number of new payload timeouts", + }, + nil, + ), + GetPayloadDurationTimeout: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "get_payload_duration_timeout", + Help: "Number of get payload timeouts", + }, + nil, + ), + HTTPTimeout: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "http_timeout", + Help: "Number of HTTP timeouts", + }, + nil, + ), + + // Error counters + ParseError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "parse_error", + Help: "Number of parse errors", + }, + nil, + ), + InvalidRequest: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "invalid_request", + Help: "Number of invalid requests", + }, + nil, + ), + MethodNotFound: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "method_not_found", + Help: "Number of method not found errors", + }, + nil, + ), + InvalidParams: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "invalid_params", + Help: "Number of invalid params errors", + }, + nil, + ), + InternalError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "internal_error", + Help: "Number of internal errors", + }, + nil, + ), + UnknownPayloadError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "unknown_payload_error", + Help: "Number of unknown payload errors", + }, + nil, + ), + InvalidForkchoiceState: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "invalid_forkchoice_state", + Help: "Number of invalid forkchoice state errors", + }, + nil, + ), + InvalidPayloadAttributes: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "invalid_payload_attributes", + Help: "Number of invalid payload attributes errors", + }, + nil, + ), + RequestTooLarge: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "request_too_large", + Help: "Number of request too large errors", + }, + nil, + ), + InternalServerError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_client", + Name: "internal_server_error", + Help: "Number of internal server errors", + }, + nil, + ), + logger: logger, } } -// measureForkchoiceUpdateDuration measures the duration of the forkchoice -// update. -func (cm *clientMetrics) measureForkchoiceUpdateDuration(startTime time.Time) { - // TODO: Add Labels. - cm.sink.MeasureSince( - "beacon_kit.execution.client.forkchoice_update_duration", - startTime, - ) +// measureForkchoiceUpdateDuration measures the duration of the forkchoice update. +func (m *Metrics) measureForkchoiceUpdateDuration(startTime time.Time) { + m.ForkchoiceUpdateDuration.Observe(time.Since(startTime).Seconds()) } // measureNewPayloadDuration measures the duration of the new payload. -func (cm *clientMetrics) measureNewPayloadDuration(startTime time.Time) { - // TODO: Add Labels. - cm.sink.MeasureSince( - "beacon_kit.execution.client.new_payload_duration", - startTime, - ) +func (m *Metrics) measureNewPayloadDuration(startTime time.Time) { + m.NewPayloadDuration.Observe(time.Since(startTime).Seconds()) } // measureGetPayloadDuration measures the duration of the get payload. -func (cm *clientMetrics) measureGetPayloadDuration(startTime time.Time) { - // TODO: Add Labels. - cm.sink.MeasureSince( - "beacon_kit.execution.client.get_payload_duration", - startTime, - ) -} - -// incrementEngineAPITimeout increments the timeout counter for -// general engine api timeouts. -func (cm *clientMetrics) incrementEngineAPITimeout() { - cm.incrementTimeoutCounter( - "beacon_kit.execution.client.engine_api") -} - -// incrementForkchoiceUpdateTimeout increments the timeout counter -// for forkchoice update. -func (cm *clientMetrics) incrementForkchoiceUpdateTimeout() { - cm.incrementTimeoutCounter( - "beacon_kit.execution.client.forkchoice_update_duration") +func (m *Metrics) measureGetPayloadDuration(startTime time.Time) { + m.GetPayloadDuration.Observe(time.Since(startTime).Seconds()) } -// incrementNewPayloadTimeout increments the timeout counter for -// new payload. -func (cm *clientMetrics) incrementNewPayloadTimeout() { - cm.incrementTimeoutCounter( - "beacon_kit.execution.client.new_payload_duration") +// incrementEngineAPITimeout increments the timeout counter for general engine api timeouts. +func (m *Metrics) incrementEngineAPITimeout() { + m.EngineAPITimeout.Add(1) } -// incrementGetPayloadTimeout increments the timeout counter for -// get payload. -func (cm *clientMetrics) incrementGetPayloadTimeout() { - cm.incrementTimeoutCounter( - "beacon_kit.execution.client.get_payload_duration") +// incrementForkchoiceUpdateTimeout increments the timeout counter for forkchoice update. +func (m *Metrics) incrementForkchoiceUpdateTimeout() { + m.ForkchoiceUpdateDurationTimeout.Add(1) } -// incrementHTTPTimeout increments the timeout counter for HTTP. -func (cm *clientMetrics) incrementHTTPTimeoutCounter() { - cm.incrementTimeoutCounter("beacon_kit.execution.client.http") +// incrementNewPayloadTimeout increments the timeout counter for new payload. +func (m *Metrics) incrementNewPayloadTimeout() { + m.NewPayloadDurationTimeout.Add(1) } -// incrementTimeoutCounter increments the timeout counter for -// the given metric. -func (cm *clientMetrics) incrementTimeoutCounter(metricName string) { - cm.sink.IncrementCounter(metricName + "_timeout") +// incrementGetPayloadTimeout increments the timeout counter for get payload. +func (m *Metrics) incrementGetPayloadTimeout() { + m.GetPayloadDurationTimeout.Add(1) } -// incrementParseErrorCounter increments the parse error counter -// for the given metric. -func (cm *clientMetrics) incrementParseErrorCounter() { - cm.sink.IncrementCounter("beacon_kit.execution.client.parse_error") +// incrementHTTPTimeoutCounter increments the timeout counter for HTTP. +func (m *Metrics) incrementHTTPTimeoutCounter() { + m.HTTPTimeout.Add(1) } -// incrementInvalidRequestCounter increments the invalid request counter -// for the given metric. -func (cm *clientMetrics) incrementInvalidRequestCounter() { - cm.incrementErrorCounter("beacon_kit.execution.client.invalid_request") +// incrementParseErrorCounter increments the parse error counter. +func (m *Metrics) incrementParseErrorCounter() { + m.ParseError.Add(1) } -// incrementMethodNotFoundCounter increments the method not found counter -// for the given metric. -func (cm *clientMetrics) incrementMethodNotFoundCounter() { - cm.incrementErrorCounter("beacon_kit.execution.client.method_not_found") +// incrementInvalidRequestCounter increments the invalid request counter. +func (m *Metrics) incrementInvalidRequestCounter() { + m.InvalidRequest.Add(1) } -// incrementInvalidParamsCounter increments the invalid params counter -// for the given metric. -func (cm *clientMetrics) incrementInvalidParamsCounter() { - cm.incrementErrorCounter("beacon_kit.execution.client.invalid_params") +// incrementMethodNotFoundCounter increments the method not found counter. +func (m *Metrics) incrementMethodNotFoundCounter() { + m.MethodNotFound.Add(1) } -// incrementInternalErrorCounter increments the internal error counter -// for the given metric. -func (cm *clientMetrics) incrementInternalErrorCounter() { - cm.incrementErrorCounter("beacon_kit.execution.client.internal_error") +// incrementInvalidParamsCounter increments the invalid params counter. +func (m *Metrics) incrementInvalidParamsCounter() { + m.InvalidParams.Add(1) } -// incrementUnknownPayloadErrorCounter increments the unknown payload error -// counter -// for the given metric. -func (cm *clientMetrics) incrementUnknownPayloadErrorCounter() { - cm.incrementErrorCounter( - "beacon_kit.execution.client.unknown_payload_error", - ) +// incrementInternalErrorCounter increments the internal error counter. +func (m *Metrics) incrementInternalErrorCounter() { + m.InternalError.Add(1) } -// incrementInvalidForkchoiceStateCounter increments the invalid forkchoice -// state counter -// for the given metric. -func (cm *clientMetrics) incrementInvalidForkchoiceStateCounter() { - cm.incrementErrorCounter( - "beacon_kit.execution.client.invalid_forkchoice_state", - ) +// incrementUnknownPayloadErrorCounter increments the unknown payload error counter. +func (m *Metrics) incrementUnknownPayloadErrorCounter() { + m.UnknownPayloadError.Add(1) } -// incrementInvalidPayloadAttributesCounter increments the invalid payload -// attributes counter -// for the given metric. -func (cm *clientMetrics) incrementInvalidPayloadAttributesCounter() { - cm.incrementErrorCounter( - "beacon_kit.execution.client.invalid_payload_attributes", - ) +// incrementInvalidForkchoiceStateCounter increments the invalid forkchoice state counter. +func (m *Metrics) incrementInvalidForkchoiceStateCounter() { + m.InvalidForkchoiceState.Add(1) } -// incrementRequestTooLargeCounter increments the request too large counter -// for the given metric. -func (cm *clientMetrics) incrementRequestTooLargeCounter() { - cm.incrementErrorCounter("beacon_kit.execution.client.request_too_large") +// incrementInvalidPayloadAttributesCounter increments the invalid payload attributes counter. +func (m *Metrics) incrementInvalidPayloadAttributesCounter() { + m.InvalidPayloadAttributes.Add(1) } -// incrementInternalServerErrorCounter increments the internal server error -// counter -// for the given metric. -func (cm *clientMetrics) incrementInternalServerErrorCounter() { - cm.incrementErrorCounter( - "beacon_kit.execution.client.internal_server_error", - ) +// incrementRequestTooLargeCounter increments the request too large counter. +func (m *Metrics) incrementRequestTooLargeCounter() { + m.RequestTooLarge.Add(1) } -// incrementErrorCounter increments the error counter for -// the given metric. -func (cm *clientMetrics) incrementErrorCounter(metricName string) { - cm.sink.IncrementCounter(metricName) +// incrementInternalServerErrorCounter increments the internal server error counter. +func (m *Metrics) incrementInternalServerErrorCounter() { + m.InternalServerError.Add(1) } diff --git a/execution/engine/engine.go b/execution/engine/engine.go index 030b32ddb0..56d5752b68 100644 --- a/execution/engine/engine.go +++ b/execution/engine/engine.go @@ -42,19 +42,19 @@ type Engine struct { // logger is the logger for the engine. logger log.Logger // metrics is the metrics for the engine. - metrics *engineMetrics + metrics *Metrics } // New creates a new Engine. func New( engineClient *client.EngineClient, logger log.Logger, - telemtrySink TelemetrySink, + metrics *Metrics, ) *Engine { return &Engine{ ec: engineClient, logger: logger, - metrics: newEngineMetrics(telemtrySink, logger), + metrics: metrics, } } diff --git a/execution/engine/interfaces.go b/execution/engine/interfaces.go index e6082245fb..29dfe597ce 100644 --- a/execution/engine/interfaces.go +++ b/execution/engine/interfaces.go @@ -19,10 +19,3 @@ // TITLE. package engine - -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - // IncrementCounter increments a counter metric identified by the provided - // keys. - IncrementCounter(key string, args ...string) -} diff --git a/execution/engine/metrics.go b/execution/engine/metrics.go index 65f8beb8e7..3753958432 100644 --- a/execution/engine/metrics.go +++ b/execution/engine/metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -28,59 +28,195 @@ import ( engineerrors "github.com/berachain/beacon-kit/engine-primitives/errors" "github.com/berachain/beacon-kit/errors" "github.com/berachain/beacon-kit/log" + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/common" ) -// engineMetrics is a struct that contains metrics for the engine. -type engineMetrics struct { - // TelemetrySink is the sink for the metrics. - sink TelemetrySink - // logger is the logger for the engineMetrics. +// Metrics is a struct that contains metrics for the execution engine. +type Metrics struct { + // New payload metrics + NewPayload metrics.Counter + NewPayloadValid metrics.Counter + NewPayloadAcceptedPayloadStatus metrics.Counter + NewPayloadSyncingPayloadStatus metrics.Counter + NewPayloadInvalidPayloadStatus metrics.Counter + NewPayloadNonFatalError metrics.Counter + NewPayloadFatalError metrics.Counter + NewPayloadUndefinedError metrics.Counter + + // Forkchoice update metrics + ForkchoiceUpdate metrics.Counter + ForkchoiceUpdateValid metrics.Counter + ForkchoiceUpdateSyncing metrics.Counter + ForkchoiceUpdateInvalid metrics.Counter + ForkchoiceUpdateFatalError metrics.Counter + ForkchoiceUpdateNonFatalError metrics.Counter + ForkchoiceUpdateUndefinedError metrics.Counter + logger log.Logger } -// newEngineMetrics creates a new engineMetrics. -func newEngineMetrics( - sink TelemetrySink, - logger log.Logger, -) *engineMetrics { - return &engineMetrics{ - sink: sink, +// NewMetrics returns a new Metrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +// +//nolint:funlen +func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { + return &Metrics{ + // New payload metrics + NewPayload: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload", + Help: "Number of new payload calls", + }, + []string{"payload_block_hash", "payload_parent_block_hash"}, + ), + NewPayloadValid: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_valid", + Help: "Number of valid new payload responses", + }, + nil, + ), + NewPayloadAcceptedPayloadStatus: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_accepted_payload_status", + Help: "Number of accepted payload status responses", + }, + nil, + ), + NewPayloadSyncingPayloadStatus: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_syncing_payload_status", + Help: "Number of syncing payload status responses", + }, + nil, + ), + NewPayloadInvalidPayloadStatus: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_invalid_payload_status", + Help: "Number of invalid payload status responses", + }, + nil, + ), + NewPayloadNonFatalError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_non_fatal_error", + Help: "Number of non-fatal errors during new payload", + }, + []string{"error"}, + ), + NewPayloadFatalError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_fatal_error", + Help: "Number of fatal errors during new payload", + }, + []string{"error"}, + ), + NewPayloadUndefinedError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "new_payload_undefined_error", + Help: "Number of undefined errors during new payload", + }, + []string{"error"}, + ), + + // Forkchoice update metrics + ForkchoiceUpdate: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update", + Help: "Number of forkchoice update calls", + }, + []string{"has_payload_attributes"}, + ), + ForkchoiceUpdateValid: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_valid", + Help: "Number of valid forkchoice update responses", + }, + nil, + ), + ForkchoiceUpdateSyncing: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_syncing", + Help: "Number of syncing forkchoice update responses", + }, + []string{"error"}, + ), + ForkchoiceUpdateInvalid: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_invalid", + Help: "Number of invalid forkchoice update responses", + }, + []string{"error"}, + ), + ForkchoiceUpdateFatalError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_fatal_error", + Help: "Number of fatal errors during forkchoice update", + }, + []string{"error"}, + ), + ForkchoiceUpdateNonFatalError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_non_fatal_error", + Help: "Number of non-fatal errors during forkchoice update", + }, + []string{"error"}, + ), + ForkchoiceUpdateUndefinedError: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "execution_engine", + Name: "forkchoice_update_undefined_error", + Help: "Number of undefined errors during forkchoice update", + }, + []string{"error"}, + ), + logger: logger, } } // markNewPayloadCalled increments the counter for new payload calls. -func (em *engineMetrics) markNewPayloadCalled( +func (m *Metrics) markNewPayloadCalled( payloadHash common.ExecutionHash, parentHash common.ExecutionHash, ) { - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload", + m.NewPayload.With( "payload_block_hash", payloadHash.Hex(), "payload_parent_block_hash", parentHash.Hex(), - ) + ).Add(1) } // markNewPayloadValid increments the counter for valid payloads. -func (em *engineMetrics) markNewPayloadValid( +func (m *Metrics) markNewPayloadValid( payloadHash common.ExecutionHash, parentHash common.ExecutionHash, ) { - em.logger.Debug( + m.logger.Debug( "Inserted new payload into execution chain", "payload_block_hash", payloadHash, "payload_parent_block_hash", parentHash, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload_valid", - ) + m.NewPayloadValid.Add(1) } // markNewPayloadAcceptedSyncingPayloadStatus increments // the counter for accepted syncing payload status. -func (em *engineMetrics) markNewPayloadAcceptedSyncingPayloadStatus( +func (m *Metrics) markNewPayloadAcceptedSyncingPayloadStatus( errStatus error, payloadHash common.ExecutionHash, parentHash common.ExecutionHash, @@ -89,105 +225,88 @@ func (em *engineMetrics) markNewPayloadAcceptedSyncingPayloadStatus( if errors.Is(errStatus, engineerrors.ErrSyncingPayloadStatus) { status = "syncing" } - em.logger.Warn( + m.logger.Warn( fmt.Sprintf("Received %s payload status during new payload. Awaiting execution client to finish sync.", status), "payload_block_hash", payloadHash, "parent_hash", parentHash, ) - em.sink.IncrementCounter( - fmt.Sprintf("beacon_kit.execution.engine.new_payload_%s_payload_status", status), - ) + if status == "accepted" { + m.NewPayloadAcceptedPayloadStatus.Add(1) + } else { + m.NewPayloadSyncingPayloadStatus.Add(1) + } } // markNewPayloadInvalidPayloadStatus increments the counter // for invalid payload status. -func (em *engineMetrics) markNewPayloadInvalidPayloadStatus( +func (m *Metrics) markNewPayloadInvalidPayloadStatus( payloadHash common.ExecutionHash, ) { - em.logger.Error( + m.logger.Error( "Received invalid payload status during new payload call", "payload_block_hash", payloadHash, "parent_hash", payloadHash, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload_invalid_payload_status", - ) + m.NewPayloadInvalidPayloadStatus.Add(1) } -// markNewPayloadFatalError increments the counter for JSON-RPC errors. -func (em *engineMetrics) markNewPayloadNonFatalError( +// markNewPayloadNonFatalError increments the counter for non-fatal errors. +func (m *Metrics) markNewPayloadNonFatalError( payloadHash common.ExecutionHash, lastValidHash common.ExecutionHash, err error, ) { - em.logger.Error( + m.logger.Error( "Received non-fatal error during new payload call", "payload_block_hash", payloadHash, "parent_hash", payloadHash, "last_valid_hash", lastValidHash, "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload_non_fatal_error", - "error", err.Error(), - ) + m.NewPayloadNonFatalError.With("error", err.Error()).Add(1) } -// markNewPayloadFatalError increments the counter for JSON-RPC errors. -func (em *engineMetrics) markNewPayloadFatalError( +// markNewPayloadFatalError increments the counter for fatal errors. +func (m *Metrics) markNewPayloadFatalError( payloadHash common.ExecutionHash, lastValidHash common.ExecutionHash, err error, ) { - em.logger.Error( + m.logger.Error( "Received fatal error during new payload call", "payload_block_hash", payloadHash, "parent_hash", payloadHash, "last_valid_hash", lastValidHash, "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload_fatal_error", - "error", err.Error(), - ) + m.NewPayloadFatalError.With("error", err.Error()).Add(1) } // markNewPayloadUndefinedError increments the counter for undefined errors. -func (em *engineMetrics) markNewPayloadUndefinedError( +func (m *Metrics) markNewPayloadUndefinedError( payloadHash common.ExecutionHash, err error, ) { - em.logger.Error( + m.logger.Error( "Received undefined error during new payload call", "payload_block_hash", payloadHash, "parent_hash", payloadHash, "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.new_payload_undefined_error", - "error", err.Error(), - ) + m.NewPayloadUndefinedError.With("error", err.Error()).Add(1) } // markNotifyForkchoiceUpdateCalled increments the counter for // notify forkchoice update calls. -func (em *engineMetrics) markNotifyForkchoiceUpdateCalled( +func (m *Metrics) markNotifyForkchoiceUpdateCalled( hasPayloadAttributes bool, ) { - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update", - "has_payload_attributes", strconv.FormatBool(hasPayloadAttributes), - ) + m.ForkchoiceUpdate.With("has_payload_attributes", strconv.FormatBool(hasPayloadAttributes)).Add(1) } -// markForkchoiceUpdateValid increments the counter for valid forkchoice -// updates. -func (em *engineMetrics) markForkchoiceUpdateValid( +// markForkchoiceUpdateValid increments the counter for valid forkchoice updates. +func (m *Metrics) markForkchoiceUpdateValid( state *engineprimitives.ForkchoiceStateV1, hasPayloadAttributes bool, payloadID *engineprimitives.PayloadID, @@ -201,20 +320,16 @@ func (em *engineMetrics) markForkchoiceUpdateValid( if hasPayloadAttributes { args = append(args, "payload_id", payloadID) } - em.logger.Debug("Forkchoice updated", args...) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_valid", - ) + m.logger.Debug("Forkchoice updated", args...) + m.ForkchoiceUpdateValid.Add(1) } -// markForkchoiceUpdateSyncing increments -// the counter for accepted syncing forkchoice updates. -func (em *engineMetrics) markForkchoiceUpdateSyncing( +// markForkchoiceUpdateSyncing increments the counter for syncing forkchoice updates. +func (m *Metrics) markForkchoiceUpdateSyncing( state *engineprimitives.ForkchoiceStateV1, err error, ) { - em.logger.Warn( + m.logger.Warn( "Received syncing payload status during forkchoice update. Awaiting execution client to finish sync.", "head_block_hash", state.HeadBlockHash, @@ -223,74 +338,51 @@ func (em *engineMetrics) markForkchoiceUpdateSyncing( "finalized_block_hash", state.FinalizedBlockHash, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_syncing", - "error", - err.Error(), - ) + m.ForkchoiceUpdateSyncing.With("error", err.Error()).Add(1) } -// markForkchoiceUpdateInvalid increments the counter -// for invalid forkchoice updates. -func (em *engineMetrics) markForkchoiceUpdateInvalid( +// markForkchoiceUpdateInvalid increments the counter for invalid forkchoice updates. +func (m *Metrics) markForkchoiceUpdateInvalid( state *engineprimitives.ForkchoiceStateV1, err error, ) { - em.logger.Error( + m.logger.Error( "Received invalid payload status during forkchoice update call", "head_block_hash", state.HeadBlockHash, "safe_block_hash", state.SafeBlockHash, "finalized_block_hash", state.FinalizedBlockHash, "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_invalid", - "error", - err.Error(), - ) + m.ForkchoiceUpdateInvalid.With("error", err.Error()).Add(1) } -// markForkchoiceUpdateFatalError increments the counter for JSON-RPC errors +// markForkchoiceUpdateFatalError increments the counter for fatal errors // during forkchoice updates. -func (em *engineMetrics) markForkchoiceUpdateFatalError(err error) { - em.logger.Error( +func (m *Metrics) markForkchoiceUpdateFatalError(err error) { + m.logger.Error( "Received fatal error during forkchoice update call", "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_fatal_error", - "error", err.Error(), - ) + m.ForkchoiceUpdateFatalError.With("error", err.Error()).Add(1) } -// markForkchoiceUpdateNonFatalError increments the counter for JSON-RPC errors +// markForkchoiceUpdateNonFatalError increments the counter for non-fatal errors // during forkchoice updates. -func (em *engineMetrics) markForkchoiceUpdateNonFatalError(err error) { - em.logger.Error( +func (m *Metrics) markForkchoiceUpdateNonFatalError(err error) { + m.logger.Error( "Received non-fatal error during forkchoice update call", "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_non_fatal_error", - "error", err.Error(), - ) + m.ForkchoiceUpdateNonFatalError.With("error", err.Error()).Add(1) } // markForkchoiceUpdateUndefinedError increments the counter for undefined // errors during forkchoice updates. -func (em *engineMetrics) markForkchoiceUpdateUndefinedError(err error) { - em.logger.Error( +func (m *Metrics) markForkchoiceUpdateUndefinedError(err error) { + m.logger.Error( "Received undefined execution engine error during forkchoice update call", "error", err, ) - - em.sink.IncrementCounter( - "beacon_kit.execution.engine.forkchoice_update_undefined_error", - "error", err.Error(), - ) + m.ForkchoiceUpdateUndefinedError.With("error", err.Error()).Add(1) } diff --git a/go.mod b/go.mod index 3b32d16cb0..71d9b26d94 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,6 @@ require ( github.com/go-faster/xor v1.0.0 github.com/go-playground/validator/v10 v10.28.0 github.com/golang-jwt/jwt/v5 v5.3.0 - github.com/hashicorp/go-metrics v0.5.4 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/holiman/uint256 v1.3.2 github.com/karalabe/ssz v0.2.1-0.20240724074312-3d1ff7a6f7c4 @@ -40,6 +39,8 @@ require ( github.com/ory/dockertest v3.3.5+incompatible github.com/phuslu/log v1.0.120 github.com/pkg/errors v0.9.1 + github.com/prometheus/client_golang v1.20.5 + github.com/prometheus/client_model v0.6.1 github.com/prysmaticlabs/gohashtree v0.0.4-beta.0.20240624100937-73632381301b github.com/prysmaticlabs/prysm/v5 v5.3.0 github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 @@ -164,6 +165,7 @@ require ( github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect github.com/hashicorp/go-hclog v1.6.3 // indirect github.com/hashicorp/go-immutable-radix v1.3.1 // indirect + github.com/hashicorp/go-metrics v0.5.4 // indirect github.com/hashicorp/go-plugin v1.6.2 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/hashicorp/yamux v0.1.2 // indirect @@ -209,8 +211,6 @@ require ( github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pk910/dynamic-ssz v0.0.4 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.20.5 // indirect - github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/protolambda/bls12-381-util v0.1.0 // indirect diff --git a/node-api/backend/backend.go b/node-api/backend/backend.go index 5f38c309e8..4cb600d70a 100644 --- a/node-api/backend/backend.go +++ b/node-api/backend/backend.go @@ -30,14 +30,13 @@ import ( corestore "cosmossdk.io/core/store" "cosmossdk.io/log" "cosmossdk.io/store" - sdkmetrics "cosmossdk.io/store/metrics" storetypes "cosmossdk.io/store/types" "github.com/berachain/beacon-kit/chain" ctypes "github.com/berachain/beacon-kit/consensus-types/types" cometbft "github.com/berachain/beacon-kit/consensus/cometbft/service" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" "github.com/berachain/beacon-kit/node-core/types" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/state-transition/core/state" kvstorage "github.com/berachain/beacon-kit/storage" "github.com/berachain/beacon-kit/storage/beacondb" @@ -186,12 +185,8 @@ func (b *Backend) initGenesisState() error { if err != nil { return fmt.Errorf("failed opening mem db: %w", err) } - var ( - nopLog = log.NewNopLogger() - nopMetrics = sdkmetrics.NewNoOpMetrics() - ) - - b.cms = store.NewCommitMultiStore(b.db, nopLog, nopMetrics) + nopLog := log.NewNopLogger() + b.cms = store.NewCommitMultiStore(b.db, nopLog, kvstorage.NoOpStoreMetrics{}) b.cms.MountStoreWithDB(backendStoreKey, storetypes.StoreTypeIAVL, nil) if err = b.cms.LoadLatestVersion(); err != nil { @@ -209,7 +204,7 @@ func (b *Backend) initGenesisState() error { kvStore.WithContext(sdkCtx), b.cs, sdkCtx.Logger(), - metrics.NewNoOpTelemetrySink(), + state.NewMetrics(discard.NewFactory()), ) return nil } diff --git a/node-api/backend/backend_test.go b/node-api/backend/backend_test.go index 1208b59e0e..fb310b1451 100644 --- a/node-api/backend/backend_test.go +++ b/node-api/backend/backend_test.go @@ -32,9 +32,9 @@ import ( cometbft "github.com/berachain/beacon-kit/consensus/cometbft/service" "github.com/berachain/beacon-kit/node-api/backend" "github.com/berachain/beacon-kit/node-api/backend/mocks" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" coremocks "github.com/berachain/beacon-kit/node-core/types/mocks" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/math" "github.com/berachain/beacon-kit/state-transition/core/state" @@ -158,7 +158,7 @@ func TestBackendLoadData(t *testing.T) { _, kvStore, depositStore, err := statetransition.BuildTestStores() require.NoError(t, err) sb := storage.NewBackend( - cs, nil, kvStore, depositStore, nil, log.NewNopLogger(), metrics.NewNoOpTelemetrySink(), + cs, nil, kvStore, depositStore, nil, log.NewNopLogger(), state.NewMetrics(discard.NewFactory()), ) tcs := coremocks.NewConsensusService(t) diff --git a/node-api/handlers/beacon/genesis_test.go b/node-api/handlers/beacon/genesis_test.go index 03a675d221..798bb6448c 100644 --- a/node-api/handlers/beacon/genesis_test.go +++ b/node-api/handlers/beacon/genesis_test.go @@ -27,7 +27,6 @@ import ( corestore "cosmossdk.io/core/store" "cosmossdk.io/log" "cosmossdk.io/store" - sdkmetrics "cosmossdk.io/store/metrics" storetypes "cosmossdk.io/store/types" "github.com/berachain/beacon-kit/chain" "github.com/berachain/beacon-kit/config/spec" @@ -40,7 +39,7 @@ import ( beacontypes "github.com/berachain/beacon-kit/node-api/handlers/beacon/types" "github.com/berachain/beacon-kit/node-api/handlers/types" "github.com/berachain/beacon-kit/node-api/middleware" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/math" "github.com/berachain/beacon-kit/state-transition/core/state" @@ -157,12 +156,8 @@ func initTestGenesisState(t *testing.T, cs chain.Spec) *state.StateDB { db, err := db.OpenDB("", dbm.MemDBBackend) require.NoError(t, err) - var ( - nopLog = log.NewNopLogger() - nopMetrics = sdkmetrics.NewNoOpMetrics() - ) - - cms := store.NewCommitMultiStore(db, nopLog, nopMetrics) + nopLog := log.NewNopLogger() + cms := store.NewCommitMultiStore(db, nopLog, kvstorage.NoOpStoreMetrics{}) cms.MountStoreWithDB(testStoreKey, storetypes.StoreTypeIAVL, nil) require.NoError(t, cms.LoadLatestVersion()) @@ -177,6 +172,6 @@ func initTestGenesisState(t *testing.T, cs chain.Spec) *state.StateDB { kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), - metrics.NewNoOpTelemetrySink(), + state.NewMetrics(discard.NewFactory()), ) } diff --git a/node-api/handlers/beacon/validators_filters_test.go b/node-api/handlers/beacon/validators_filters_test.go index ded3d52dff..c8cf81a60d 100644 --- a/node-api/handlers/beacon/validators_filters_test.go +++ b/node-api/handlers/beacon/validators_filters_test.go @@ -41,7 +41,7 @@ import ( handlertypes "github.com/berachain/beacon-kit/node-api/handlers/types" "github.com/berachain/beacon-kit/node-api/handlers/utils" "github.com/berachain/beacon-kit/node-api/middleware" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/constants" "github.com/berachain/beacon-kit/primitives/math" statedb "github.com/berachain/beacon-kit/state-transition/core/state" @@ -550,7 +550,7 @@ func makeTestState(t *testing.T, cs chain.Spec) *statedb.StateDB { require.NoError(t, errSt) sdkCtx := sdk.NewContext(cms.CacheMultiStore(), true, cosmoslog.NewNopLogger()) st := statedb.NewBeaconStateFromDB( - kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), metrics.NewNoOpTelemetrySink(), + kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), statedb.NewMetrics(discard.NewFactory()), ) return st } diff --git a/node-core/components/backend.go b/node-core/components/backend.go index 135055b8f1..e3b2f4a073 100644 --- a/node-core/components/backend.go +++ b/node-core/components/backend.go @@ -26,8 +26,8 @@ import ( "github.com/berachain/beacon-kit/consensus-types/types" dastore "github.com/berachain/beacon-kit/da/store" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" + statedb "github.com/berachain/beacon-kit/state-transition/core/state" "github.com/berachain/beacon-kit/storage/beacondb" "github.com/berachain/beacon-kit/storage/block" "github.com/berachain/beacon-kit/storage/deposit" @@ -42,7 +42,7 @@ type StorageBackendInput struct { DepositStore deposit.StoreManager BeaconStore *beacondb.KVStore Logger *phuslu.Logger - TelemetrySink *metrics.TelemetrySink + StateDBMetrics *statedb.Metrics } // ProvideStorageBackend is the depinject provider that returns a beacon storage @@ -57,6 +57,6 @@ func ProvideStorageBackend( in.DepositStore, in.BlockStore, in.Logger.With("service", "storage-backend"), - in.TelemetrySink, + in.StateDBMetrics, ) } diff --git a/node-core/components/blob_fetcher.go b/node-core/components/blob_fetcher.go index b23e2b55dd..009c0a1132 100644 --- a/node-core/components/blob_fetcher.go +++ b/node-core/components/blob_fetcher.go @@ -29,7 +29,6 @@ import ( "github.com/berachain/beacon-kit/config" "github.com/berachain/beacon-kit/da/blobreactor" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" "github.com/cosmos/cosmos-sdk/client/flags" "github.com/spf13/cast" @@ -39,13 +38,13 @@ import ( type BlobFetcherInput struct { depinject.In - BlobProcessor BlobProcessor - BlobReactor *blobreactor.BlobReactor - ChainSpec chain.Spec - Logger *phuslu.Logger - StorageBackend *storage.Backend - TelemetrySink *metrics.TelemetrySink - AppOpts config.AppOptions + BlobProcessor BlobProcessor + BlobReactor *blobreactor.BlobReactor + ChainSpec chain.Spec + Logger *phuslu.Logger + StorageBackend *storage.Backend + BlobFetcherMetrics *blockchain.BlobFetcherMetrics + AppOpts config.AppOptions } // ProvideBlobFetcher provides the blob fetcher for asynchronous blob retrieval. @@ -58,6 +57,6 @@ func ProvideBlobFetcher(in BlobFetcherInput) (blockchain.BlobFetcher, error) { in.StorageBackend, in.ChainSpec, blockchain.DefaultBlobFetcherConfig(), - in.TelemetrySink, + in.BlobFetcherMetrics, ) } diff --git a/node-core/components/blob_reactor.go b/node-core/components/blob_reactor.go index 9e69c745b9..80fd662906 100644 --- a/node-core/components/blob_reactor.go +++ b/node-core/components/blob_reactor.go @@ -25,7 +25,6 @@ import ( "github.com/berachain/beacon-kit/config" "github.com/berachain/beacon-kit/da/blobreactor" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" ) @@ -33,10 +32,10 @@ import ( type BlobReactorInput struct { depinject.In - Config *config.Config - Logger *phuslu.Logger - StorageBackend *storage.Backend - TelemetrySink *metrics.TelemetrySink + Config *config.Config + Logger *phuslu.Logger + StorageBackend *storage.Backend + BlobReactorMetrics *blobreactor.Metrics } // ProvideBlobReactor provides the blob reactor for P2P communication. @@ -52,6 +51,6 @@ func ProvideBlobReactor(in BlobReactorInput) *blobreactor.BlobReactor { in.StorageBackend.AvailabilityStore().IndexDB, in.Logger.With("service", "blob-reactor"), cfg, - in.TelemetrySink, + in.BlobReactorMetrics, ) } diff --git a/node-core/components/blobs.go b/node-core/components/blobs.go index 9198b7e0dd..53dd436aab 100644 --- a/node-core/components/blobs.go +++ b/node-core/components/blobs.go @@ -27,7 +27,6 @@ import ( dablob "github.com/berachain/beacon-kit/da/blob" "github.com/berachain/beacon-kit/da/kzg" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" gokzg4844 "github.com/crate-crypto/go-kzg-4844" "github.com/spf13/cast" ) @@ -58,7 +57,8 @@ type BlobProcessorIn struct { BlobProofVerifier kzg.BlobProofVerifier Logger *phuslu.Logger - TelemetrySink *metrics.TelemetrySink + ProcessorMetrics *dablob.ProcessorMetrics + VerifierMetrics *dablob.VerifierMetrics } // ProvideBlobProcessor is a function that provides the BlobProcessor to the @@ -67,6 +67,7 @@ func ProvideBlobProcessor(in BlobProcessorIn) *dablob.Processor { return dablob.NewProcessor( in.Logger.With("service", "blob-processor"), in.BlobProofVerifier, - in.TelemetrySink, + in.ProcessorMetrics, + in.VerifierMetrics, ) } diff --git a/node-core/components/chain_service.go b/node-core/components/chain_service.go index 63beb4888a..addf7d1537 100644 --- a/node-core/components/chain_service.go +++ b/node-core/components/chain_service.go @@ -27,7 +27,6 @@ import ( "github.com/berachain/beacon-kit/execution/deposit" "github.com/berachain/beacon-kit/execution/engine" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" ) @@ -43,7 +42,7 @@ type ChainServiceInput struct { StorageBackend *storage.Backend BlobProcessor BlobProcessor BlobFetcher blockchain.BlobFetcher - TelemetrySink *metrics.TelemetrySink + BlockchainMetrics *blockchain.Metrics BeaconDepositContract deposit.Contract } @@ -59,6 +58,6 @@ func ProvideChainService(in ChainServiceInput) *blockchain.Service { in.ExecutionEngine, in.LocalBuilder, in.StateProcessor, - in.TelemetrySink, + in.BlockchainMetrics, ) } diff --git a/node-core/components/telemetry_service.go b/node-core/components/cometbft_metrics.go similarity index 70% rename from node-core/components/telemetry_service.go rename to node-core/components/cometbft_metrics.go index 79112d0a42..b7dda15452 100644 --- a/node-core/components/telemetry_service.go +++ b/node-core/components/cometbft_metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -21,13 +21,11 @@ package components import ( - "github.com/berachain/beacon-kit/config/config" - "github.com/berachain/beacon-kit/observability/telemetry" + cometbft "github.com/berachain/beacon-kit/consensus/cometbft/service" + "github.com/berachain/beacon-kit/observability/metrics" ) -// ProvideTelemetryService is a function that provides a TelemetrySink. -func ProvideTelemetryService( - cfg *config.Config, -) (*telemetry.Service, error) { - return telemetry.NewService(&cfg.Telemetry) +// ProvideCometBFTMetrics provides metrics for the CometBFT service. +func ProvideCometBFTMetrics(factory metrics.Factory) *cometbft.Metrics { + return cometbft.NewMetrics(factory) } diff --git a/node-core/components/cometbft_service.go b/node-core/components/cometbft_service.go index 017f85620b..deb565f721 100644 --- a/node-core/components/cometbft_service.go +++ b/node-core/components/cometbft_service.go @@ -29,7 +29,6 @@ import ( "github.com/berachain/beacon-kit/da/blobreactor" "github.com/berachain/beacon-kit/log/phuslu" "github.com/berachain/beacon-kit/node-core/builder" - "github.com/berachain/beacon-kit/node-core/components/metrics" cmtcfg "github.com/cometbft/cometbft/config" dbm "github.com/cosmos/cosmos-db" ) @@ -44,7 +43,7 @@ func ProvideCometBFTService( cs chain.Spec, cmtCfg *cmtcfg.Config, appOpts config.AppOptions, - telemetrySink *metrics.TelemetrySink, + metrics *cometbft.Metrics, ) *cometbft.Service { return cometbft.NewService( logger, @@ -54,7 +53,7 @@ func ProvideCometBFTService( blobReactor, cs, cmtCfg, - telemetrySink, + metrics, builder.DefaultServiceOptions(appOpts)..., ) } diff --git a/node-core/components/engine.go b/node-core/components/engine.go index 35bbf3c032..c13de283ca 100644 --- a/node-core/components/engine.go +++ b/node-core/components/engine.go @@ -29,7 +29,6 @@ import ( "github.com/berachain/beacon-kit/execution/client" "github.com/berachain/beacon-kit/execution/engine" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/primitives/net/jwt" ) @@ -39,9 +38,9 @@ type EngineClientInputs struct { ChainSpec chain.Spec Config *config.Config // TODO: this feels like a hood way to handle it. - JWTSecret *jwt.Secret `optional:"true"` - Logger *phuslu.Logger - TelemetrySink *metrics.TelemetrySink + JWTSecret *jwt.Secret `optional:"true"` + Logger *phuslu.Logger + Metrics *client.Metrics } // ProvideEngineClient creates a new EngineClient. @@ -50,17 +49,17 @@ func ProvideEngineClient(in EngineClientInputs) *client.EngineClient { in.Config.GetEngine(), in.Logger.With("service", "engine.client"), in.JWTSecret, - in.TelemetrySink, + in.Metrics, new(big.Int).SetUint64(in.ChainSpec.DepositEth1ChainID()), ) } -// EngineClientInputs is the input for the EngineClient. +// ExecutionEngineInputs is the input for the ExecutionEngine. type ExecutionEngineInputs struct { depinject.In - EngineClient *client.EngineClient - Logger *phuslu.Logger - TelemetrySink *metrics.TelemetrySink + EngineClient *client.EngineClient + Logger *phuslu.Logger + Metrics *engine.Metrics } // ProvideExecutionEngine provides the execution engine to the depinject @@ -69,6 +68,6 @@ func ProvideExecutionEngine(in ExecutionEngineInputs) *engine.Engine { return engine.New( in.EngineClient, in.Logger.With("service", "execution-engine"), - in.TelemetrySink, + in.Metrics, ) } diff --git a/node-core/components/interfaces.go b/node-core/components/interfaces.go index 3ab1abdbd2..007db6efd3 100644 --- a/node-core/components/interfaces.go +++ b/node-core/components/interfaces.go @@ -164,13 +164,6 @@ type ( StateFromContext(context.Context) *statedb.StateDB } - // // TelemetrySink is an interface for sending metrics to a telemetry - // backend. - // TelemetrySink interface { - // // MeasureSince measures the time since the given time. - // MeasureSince(key string, start time.Time, args ...string) - // } - // // Validator represents an interface for a validator with generic type // // ValidatorT. // Validator[ diff --git a/node-core/components/metrics/sink.go b/node-core/components/metrics/sink.go deleted file mode 100644 index 0b0d525acc..0000000000 --- a/node-core/components/metrics/sink.go +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: BUSL-1.1 -// -// Copyright (C) 2025, Berachain Foundation. All rights reserved. -// Use of this software is governed by the Business Source License included -// in the LICENSE file of this repository and at www.mariadb.com/bsl11. -// -// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY -// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER -// VERSIONS OF THE LICENSED WORK. -// -// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF -// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF -// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). -// -// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, -// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND -// TITLE. - -package metrics - -import ( - "time" - - "github.com/cosmos/cosmos-sdk/telemetry" - "github.com/hashicorp/go-metrics" -) - -type TelemetrySink struct{} - -// NewTelemetrySink creates a new TelemetrySink. -func NewTelemetrySink() TelemetrySink { - return TelemetrySink{} -} - -// IncrementCounter increments a counter metric identified by the provided -// keys. -func (TelemetrySink) IncrementCounter(key string, args ...string) { - telemetry.IncrCounterWithLabels([]string{key}, 1, argsToLabels(args...)) -} - -// SetGauge sets a gauge metric to the specified value, identified by the -// provided keys. -func (TelemetrySink) SetGauge(key string, value int64, args ...string) { - telemetry.SetGaugeWithLabels( - []string{key}, - float32(value), - argsToLabels(args...), - ) -} - -// MeasureSince measures the time since the provided start time and records -// the duration in a metric identified by the provided key. -func (TelemetrySink) MeasureSince(key string, start time.Time, args ...string) { - if !telemetry.IsTelemetryEnabled() { - return - } - - // TODO: Make PR to SDK, currently this will not have any globalLabels. - metrics.MeasureSinceWithLabels( - []string{key}, - start.UTC(), - argsToLabels(args...), - ) -} - -// argsToLabels converts a list of key-value pairs to a list of metrics labels. -// -//nolint:mnd // its okay. -func argsToLabels(args ...string) []metrics.Label { - labels := make([]metrics.Label, len(args)/2) - for i := 0; i < len(args); i += 2 { - labels[i/2] = metrics.Label{ - Name: args[i], - Value: args[i+1], - } - } - return labels -} - -// NoOpTelemetrySink is a no-op implementation of the TelemetrySink interface. -type NoOpTelemetrySink struct{} - -// NewNoOpTelemetrySink creates a new NoOpTelemetrySink. -func NewNoOpTelemetrySink() NoOpTelemetrySink { - return NoOpTelemetrySink{} -} - -// IncrementCounter is a no-op implementation of the TelemetrySink interface. -func (NoOpTelemetrySink) IncrementCounter(string, ...string) {} - -// SetGauge is a no-op implementation of the TelemetrySink interface. -func (NoOpTelemetrySink) SetGauge(string, int64, ...string) {} - -// MeasureSince is a no-op implementation of the TelemetrySink interface. -func (NoOpTelemetrySink) MeasureSince(string, time.Time, ...string) {} diff --git a/observability/telemetry/service.go b/node-core/components/metrics_factory.go similarity index 50% rename from observability/telemetry/service.go rename to node-core/components/metrics_factory.go index b7cf9e3d63..c33fc2be2a 100644 --- a/observability/telemetry/service.go +++ b/node-core/components/metrics_factory.go @@ -13,47 +13,33 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. -package telemetry +package components import ( - "context" - - "github.com/cosmos/cosmos-sdk/telemetry" + "cosmossdk.io/depinject" + "github.com/berachain/beacon-kit/config/config" + "github.com/berachain/beacon-kit/observability/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" + "github.com/berachain/beacon-kit/observability/metrics/prometheus" ) -type Config = telemetry.Config - -// Service is a telemetry service. -type Service struct { - m *telemetry.Metrics +type MetricsFactoryInput struct { + depinject.In + Config *config.Config } -// NewService creates a new telemetry service. -func NewService(cfg *telemetry.Config) (*Service, error) { - m, err := telemetry.New(*cfg) - if err != nil { - return nil, err +// ProvideMetricsFactory provides a metrics factory based on configuration. +// When Config.Telemetry.Enabled is true, creates real Prometheus metrics. +// When false, creates no-op metrics with zero runtime overhead. +// This setting affects ALL metrics in the beacon-kit system. +func ProvideMetricsFactory(in MetricsFactoryInput) metrics.Factory { + if in.Config.Telemetry.Enabled { + return prometheus.NewFactory("beacon_kit") } - return &Service{ - m: m, - }, nil -} - -// Name returns the service name. -func (s *Service) Name() string { - return "telemetry" -} - -// Start starts the telemetry service. -func (s *Service) Start(context.Context) error { - return nil -} - -func (s *Service) Stop() error { - return nil + return discard.NewFactory() } diff --git a/node-core/components/metrics_providers.go b/node-core/components/metrics_providers.go new file mode 100644 index 0000000000..22f25793fb --- /dev/null +++ b/node-core/components/metrics_providers.go @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package components + +import ( + "cosmossdk.io/depinject" + "github.com/berachain/beacon-kit/beacon/blockchain" + "github.com/berachain/beacon-kit/beacon/validator" + dablob "github.com/berachain/beacon-kit/da/blob" + "github.com/berachain/beacon-kit/da/blobreactor" + "github.com/berachain/beacon-kit/execution/client" + "github.com/berachain/beacon-kit/execution/engine" + "github.com/berachain/beacon-kit/log/phuslu" + "github.com/berachain/beacon-kit/observability/metrics" + "github.com/berachain/beacon-kit/state-transition/core" + "github.com/berachain/beacon-kit/state-transition/core/state" +) + +// Simple metrics providers (no additional dependencies) + +func ProvideBlobFactoryMetrics(factory metrics.Factory) *dablob.FactoryMetrics { + return dablob.NewFactoryMetrics(factory) +} + +func ProvideBlobProcessorMetrics(factory metrics.Factory) *dablob.ProcessorMetrics { + return dablob.NewProcessorMetrics(factory) +} + +func ProvideBlobVerifierMetrics(factory metrics.Factory) *dablob.VerifierMetrics { + return dablob.NewVerifierMetrics(factory) +} + +func ProvideBlobFetcherMetrics(factory metrics.Factory) *blockchain.BlobFetcherMetrics { + return blockchain.NewBlobFetcherMetrics(factory) +} + +func ProvideBlobReactorMetrics(factory metrics.Factory) *blobreactor.Metrics { + return blobreactor.NewMetrics(factory) +} + +func ProvideBlockchainMetrics(factory metrics.Factory) *blockchain.Metrics { + return blockchain.NewMetrics(factory) +} + +func ProvideValidatorMetrics(factory metrics.Factory) *validator.Metrics { + return validator.NewMetrics(factory) +} + +func ProvideStateDBMetrics(factory metrics.Factory) *state.Metrics { + return state.NewMetrics(factory) +} + +func ProvideStateProcessorMetrics(factory metrics.Factory) *core.Metrics { + return core.NewMetrics(factory) +} + +type ExecutionClientMetricsInput struct { + depinject.In + Factory metrics.Factory + Logger *phuslu.Logger +} + +func ProvideExecutionClientMetrics(in ExecutionClientMetricsInput) *client.Metrics { + return client.NewMetrics(in.Factory, in.Logger.With("service", "execution-client")) +} + +type ExecutionEngineMetricsInput struct { + depinject.In + Factory metrics.Factory + Logger *phuslu.Logger +} + +func ProvideExecutionEngineMetrics(in ExecutionEngineMetricsInput) *engine.Metrics { + return engine.NewMetrics(in.Factory, in.Logger.With("service", "execution-engine")) +} + +// AllMetricsProviders returns all metrics provider functions for depinject. +// This helper groups all metrics providers together to reduce boilerplate +// in component lists (defaults.go, components.go, etc.). +func AllMetricsProviders() []any { + return []any{ + ProvideMetricsFactory, // Must be first - creates factory used by all others + ProvideStateDBMetrics, + ProvideValidatorMetrics, + ProvideExecutionClientMetrics, + ProvideExecutionEngineMetrics, + ProvideBlobFactoryMetrics, + ProvideBlobProcessorMetrics, + ProvideBlobVerifierMetrics, + ProvideBlockchainMetrics, + ProvideBlobFetcherMetrics, + ProvideBlobReactorMetrics, + ProvideStateProcessorMetrics, + ProvideCometBFTMetrics, + } +} diff --git a/node-core/components/reporting_service.go b/node-core/components/reporting_service.go index 41dc31a1a9..f320361b2a 100644 --- a/node-core/components/reporting_service.go +++ b/node-core/components/reporting_service.go @@ -25,23 +25,20 @@ import ( "github.com/berachain/beacon-kit/chain" "github.com/berachain/beacon-kit/execution/client" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/services/version" sdkversion "github.com/cosmos/cosmos-sdk/version" ) type ReportingServiceInput struct { depinject.In - Logger *phuslu.Logger - TelemetrySink *metrics.TelemetrySink - EngineClient *client.EngineClient - ChainSpec chain.Spec + Logger *phuslu.Logger + EngineClient *client.EngineClient + ChainSpec chain.Spec } func ProvideReportingService(in ReportingServiceInput) *version.ReportingService { return version.NewReportingService( in.Logger.With("service", "reporting"), - in.TelemetrySink, sdkversion.Version, in.EngineClient, in.ChainSpec, diff --git a/node-core/components/service_registry.go b/node-core/components/service_registry.go index bffd33d931..5e01ed5f74 100644 --- a/node-core/components/service_registry.go +++ b/node-core/components/service_registry.go @@ -27,12 +27,10 @@ import ( "github.com/berachain/beacon-kit/execution/client" "github.com/berachain/beacon-kit/log/phuslu" "github.com/berachain/beacon-kit/node-api/server" - "github.com/berachain/beacon-kit/node-core/components/metrics" service "github.com/berachain/beacon-kit/node-core/services/registry" "github.com/berachain/beacon-kit/node-core/services/shutdown" "github.com/berachain/beacon-kit/node-core/services/version" "github.com/berachain/beacon-kit/node-core/types" - "github.com/berachain/beacon-kit/observability/telemetry" ) // ServiceRegistryInput is the input for the service registry provider. @@ -43,8 +41,6 @@ type ServiceRegistryInput struct { Logger *phuslu.Logger NodeAPIServer *server.Server ReportingService *version.ReportingService - TelemetrySink *metrics.TelemetrySink - TelemetryService *telemetry.Service ValidatorService *validator.Service CometBFTService types.ConsensusService ShutdownService *shutdown.Service @@ -62,7 +58,6 @@ func ProvideServiceRegistry(in ServiceRegistryInput) *service.Registry { service.WithService(in.ValidatorService), service.WithService(in.NodeAPIServer), service.WithService(in.ReportingService), - service.WithService(in.TelemetryService), // engineClient will block until it connects to the execution layer service.WithService(in.EngineClient), diff --git a/node-core/components/sidecars.go b/node-core/components/sidecars.go index 58745664bf..b48e1f0cf8 100644 --- a/node-core/components/sidecars.go +++ b/node-core/components/sidecars.go @@ -23,17 +23,16 @@ package components import ( "cosmossdk.io/depinject" dablob "github.com/berachain/beacon-kit/da/blob" - "github.com/berachain/beacon-kit/node-core/components/metrics" ) type SidecarFactoryInput struct { depinject.In - TelemetrySink *metrics.TelemetrySink + Metrics *dablob.FactoryMetrics } func ProvideSidecarFactory(in SidecarFactoryInput) *dablob.SidecarFactory { return dablob.NewSidecarFactory( - in.TelemetrySink, + in.Metrics, ) } diff --git a/node-core/components/state_processor.go b/node-core/components/state_processor.go index 50c9a65a91..9955233b58 100644 --- a/node-core/components/state_processor.go +++ b/node-core/components/state_processor.go @@ -25,7 +25,6 @@ import ( "github.com/berachain/beacon-kit/chain" "github.com/berachain/beacon-kit/execution/engine" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/primitives/crypto" "github.com/berachain/beacon-kit/state-transition/core" "github.com/berachain/beacon-kit/storage/deposit" @@ -35,12 +34,12 @@ import ( // framework. type StateProcessorInput struct { depinject.In - Logger *phuslu.Logger - ChainSpec chain.Spec - ExecutionEngine *engine.Engine - DepositStore deposit.StoreManager - Signer crypto.BLSSigner - TelemetrySink *metrics.TelemetrySink + Logger *phuslu.Logger + ChainSpec chain.Spec + ExecutionEngine *engine.Engine + DepositStore deposit.StoreManager + Signer crypto.BLSSigner + StateProcessorMetrics *core.Metrics } // ProvideStateProcessor provides the state processor to the depinject @@ -53,6 +52,6 @@ func ProvideStateProcessor(in StateProcessorInput) *core.StateProcessor { in.DepositStore, in.Signer, crypto.GetAddressFromPubKey, - in.TelemetrySink, + in.StateProcessorMetrics, ) } diff --git a/node-core/components/storage/storage.go b/node-core/components/storage/storage.go index 4926c84ff1..2b7b249b64 100644 --- a/node-core/components/storage/storage.go +++ b/node-core/components/storage/storage.go @@ -42,7 +42,7 @@ type Backend struct { depositStore deposit.StoreManager blockStore *block.KVStore[*types.BeaconBlock] logger log.Logger - telemetrySink statedb.TelemetrySink + stateDBMetrics *statedb.Metrics } func NewBackend( @@ -52,7 +52,7 @@ func NewBackend( depositStore deposit.StoreManager, blockStore *block.KVStore[*types.BeaconBlock], logger log.Logger, - telemetrySink statedb.TelemetrySink, + stateDBMetrics *statedb.Metrics, ) *Backend { return &Backend{ chainSpec: chainSpec, @@ -61,7 +61,7 @@ func NewBackend( depositStore: depositStore, blockStore: blockStore, logger: logger, - telemetrySink: telemetrySink, + stateDBMetrics: stateDBMetrics, } } @@ -78,7 +78,7 @@ func (k Backend) StateFromContext(ctx context.Context) *statedb.StateDB { k.kvStore.WithContext(ctx), k.chainSpec, k.logger, - k.telemetrySink, + k.stateDBMetrics, ) } diff --git a/node-core/components/validator_service.go b/node-core/components/validator_service.go index 253cca1638..c15e7646cc 100644 --- a/node-core/components/validator_service.go +++ b/node-core/components/validator_service.go @@ -26,7 +26,6 @@ import ( "github.com/berachain/beacon-kit/chain" "github.com/berachain/beacon-kit/config" "github.com/berachain/beacon-kit/log/phuslu" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/components/storage" "github.com/berachain/beacon-kit/primitives/crypto" ) @@ -42,7 +41,7 @@ type ValidatorServiceInput struct { StorageBackend *storage.Backend Signer crypto.BLSSigner SidecarFactory SidecarFactory - TelemetrySink *metrics.TelemetrySink + Metrics *validator.Metrics } // ProvideValidatorService is a depinject provider for the validator service. @@ -57,6 +56,6 @@ func ProvideValidatorService(in ValidatorServiceInput) (*validator.Service, erro in.Signer, in.SidecarFactory, in.LocalBuilder, - in.TelemetrySink, + in.Metrics, ), nil } diff --git a/node-core/services/version/interfaces.go b/node-core/services/version/interfaces.go index 61babe8cee..de4fc45a33 100644 --- a/node-core/services/version/interfaces.go +++ b/node-core/services/version/interfaces.go @@ -22,16 +22,6 @@ package version import "github.com/berachain/beacon-kit/chain" -// TelemetrySink is an interface for sending telemetry data. -type TelemetrySink interface { - // IncrementCounter increments a counter metric identified by the provided - // keys. - IncrementCounter(key string, args ...string) - // SetGauge sets a gauge metric to the specified value, identified by the - // provided keys. - SetGauge(key string, value int64, args ...string) -} - type ForkSpec interface { chain.ForkSpec } diff --git a/node-core/services/version/version.go b/node-core/services/version/version.go index 71fbb50f82..c3b8477175 100644 --- a/node-core/services/version/version.go +++ b/node-core/services/version/version.go @@ -46,8 +46,6 @@ type ReportingService struct { version string // reportingInterval is the interval at which the version is reported. reportingInterval time.Duration - // sink is the telemetry sink used to report metrics. - sink TelemetrySink // client to query the execution layer client *client.EngineClient forkSpec ForkSpec @@ -56,7 +54,6 @@ type ReportingService struct { // NewReportingService creates a new VersionReporterService. func NewReportingService( logger log.Logger, - telemetrySink TelemetrySink, version string, engineClient *client.EngineClient, forkSpec ForkSpec, @@ -65,7 +62,6 @@ func NewReportingService( logger: logger, version: version, reportingInterval: defaultReportingInterval, - sink: telemetrySink, client: engineClient, forkSpec: forkSpec, } @@ -202,25 +198,8 @@ func (rs *ReportingService) logTelemetry( ethVersion engineprimitives.ClientVersionV1) { systemInfo := runtime.GOOS + "/" + runtime.GOARCH - // TODO: Delete this counter as it should be included in the new - // beacon_kit.runtime.version metric. - rs.sink.IncrementCounter( - "beacon_kit.runtime.version.reported", - "version", rs.version, "system", systemInfo, - ) - rs.logger.Info("Reporting version", "version", rs.version, "system", systemInfo, "eth_version", ethVersion.Version, "eth_name", ethVersion.Name) - - // Report the version to the telemetry sink and include labels - // for beacon node version and eth name and version - var args = [8]string{ - "version", rs.version, - "system", systemInfo, - "eth_version", ethVersion.Version, - "eth_name", ethVersion.Name, - } - rs.sink.SetGauge("beacon_kit.runtime.version", 1, args[:]...) } diff --git a/observability/metrics/discard/discard.go b/observability/metrics/discard/discard.go new file mode 100644 index 0000000000..1ee15b89ab --- /dev/null +++ b/observability/metrics/discard/discard.go @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package discard + +import "github.com/berachain/beacon-kit/observability/metrics" + +// Factory creates no-op metrics with zero runtime overhead. +type Factory struct{} + +// NewFactory creates a new no-op metrics factory. +// All metrics created by this factory have zero runtime overhead. +// +// Example: +// +// factory := discard.NewFactory() +// counter := factory.NewCounter(metrics.CounterOpts{ +// Subsystem: "blockchain", +// Name: "blocks_total", +// Help: "Total number of blocks processed", +// }, []string{"status"}) +// counter.Add(1) // This is a no-op, compiled away by the Go compiler +func NewFactory() metrics.Factory { + return Factory{} +} + +// NewCounter returns a no-op Counter. +func (Factory) NewCounter(metrics.CounterOpts, []string) metrics.Counter { + return NewCounter() +} + +// NewGauge returns a no-op Gauge. +func (Factory) NewGauge(metrics.GaugeOpts, []string) metrics.Gauge { + return NewGauge() +} + +// NewHistogram returns a no-op Histogram. +func (Factory) NewHistogram(metrics.HistogramOpts, []string) metrics.Histogram { + return NewHistogram() +} + +// noOpCounter is a no-op implementation of metrics.Counter. +// All operations are compiled away by the Go compiler, providing +// zero runtime overhead when telemetry is disabled. +type noOpCounter struct{} + +// NewCounter returns a no-op Counter. +func NewCounter() metrics.Counter { + return noOpCounter{} +} + +// With returns the same no-op Counter. +func (noOpCounter) With(...string) metrics.Counter { + return noOpCounter{} +} + +// Add does nothing. +func (noOpCounter) Add(float64) {} + +// noOpGauge is a no-op implementation of metrics.Gauge. +// All operations are compiled away by the Go compiler, providing +// zero runtime overhead when telemetry is disabled. +type noOpGauge struct{} + +// NewGauge returns a no-op Gauge. +func NewGauge() metrics.Gauge { + return noOpGauge{} +} + +// With returns the same no-op Gauge. +func (noOpGauge) With(...string) metrics.Gauge { + return noOpGauge{} +} + +// Set does nothing. +func (noOpGauge) Set(float64) {} + +// Add does nothing. +func (noOpGauge) Add(float64) {} + +// noOpHistogram is a no-op implementation of metrics.Histogram. +// All operations are compiled away by the Go compiler, providing +// zero runtime overhead when telemetry is disabled. +type noOpHistogram struct{} + +// NewHistogram returns a no-op Histogram. +func NewHistogram() metrics.Histogram { + return noOpHistogram{} +} + +// With returns the same no-op Histogram. +func (noOpHistogram) With(...string) metrics.Histogram { + return noOpHistogram{} +} + +// Observe does nothing. +func (noOpHistogram) Observe(float64) {} diff --git a/observability/metrics/discard/discard_test.go b/observability/metrics/discard/discard_test.go new file mode 100644 index 0000000000..a291e50718 --- /dev/null +++ b/observability/metrics/discard/discard_test.go @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package discard_test + +import ( + "testing" + + "github.com/berachain/beacon-kit/observability/metrics/discard" + "github.com/stretchr/testify/require" +) + +func TestNoOpCounter(t *testing.T) { + t.Parallel() + counter := discard.NewCounter() + + // All operations should complete without panic + counter.Add(1) + counter.Add(100) + + // With should return a counter (itself) + counter2 := counter.With("label", "value") + require.NotNil(t, counter2) + + counter2.Add(50) + + // Chaining should work + counter.With("label1", "value1").With("label2", "value2").Add(10) +} + +func TestNoOpGauge(t *testing.T) { + t.Parallel() + gauge := discard.NewGauge() + + // All operations should complete without panic + gauge.Set(42) + gauge.Add(10) + gauge.Add(-5) + + // With should return a gauge (itself) + gauge2 := gauge.With("label", "value") + require.NotNil(t, gauge2) + + gauge2.Set(100) + + // Chaining should work + gauge.With("label1", "value1").With("label2", "value2").Set(50) +} + +func TestNoOpHistogram(t *testing.T) { + t.Parallel() + histogram := discard.NewHistogram() + + // All operations should complete without panic + histogram.Observe(0.5) + histogram.Observe(1.0) + histogram.Observe(10.0) + + // With should return a histogram (itself) + histogram2 := histogram.With("label", "value") + require.NotNil(t, histogram2) + + histogram2.Observe(2.5) + + // Chaining should work + histogram.With("label1", "value1").With("label2", "value2").Observe(3.3) +} + +// Benchmark to verify no-op operations have zero cost +func BenchmarkNoOpCounter(b *testing.B) { + counter := discard.NewCounter() + b.ResetTimer() + for range b.N { + counter.Add(1) + } +} + +func BenchmarkNoOpGauge(b *testing.B) { + gauge := discard.NewGauge() + b.ResetTimer() + for i := range b.N { + gauge.Set(float64(i)) + } +} + +func BenchmarkNoOpHistogram(b *testing.B) { + histogram := discard.NewHistogram() + b.ResetTimer() + for i := range b.N { + histogram.Observe(float64(i)) + } +} diff --git a/observability/metrics/lv/labelvalues.go b/observability/metrics/lv/labelvalues.go new file mode 100644 index 0000000000..2586bdbf53 --- /dev/null +++ b/observability/metrics/lv/labelvalues.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package lv + +// LabelValues is a type alias for a slice of strings that represent +// metric label key-value pairs. It provides efficient label accumulation +// using a copy-on-write pattern. +// +// Label values should be provided as alternating key-value pairs: +// ["key1", "value1", "key2", "value2", ...] +// +// Example: +// +// var lvs lv.LabelValues +// lvs = lvs.With("method", "GET") +// lvs = lvs.With("status", "200") +// // lvs now contains: ["method", "GET", "status", "200"] +type LabelValues []string + +// With returns a new LabelValues with the given label key-value pairs appended. +// The original LabelValues is not modified (copy-on-write semantics). +// +// If the number of labelValues is odd, "unknown" is appended as the value +// for the last key to ensure all labels have values. +// +// Example: +// +// var lvs lv.LabelValues +// lvs1 := lvs.With("method", "GET") +// lvs2 := lvs1.With("status", "200") +// // lvs1 and lvs2 are independent +func (lvs LabelValues) With(labelValues ...string) LabelValues { + if len(labelValues)%2 != 0 { + labelValues = append(labelValues, "unknown") + } + return append(lvs, labelValues...) +} diff --git a/observability/metrics/lv/labelvalues_test.go b/observability/metrics/lv/labelvalues_test.go new file mode 100644 index 0000000000..3191206d25 --- /dev/null +++ b/observability/metrics/lv/labelvalues_test.go @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package lv_test + +import ( + "testing" + + "github.com/berachain/beacon-kit/observability/metrics/lv" + "github.com/stretchr/testify/require" +) + +func TestLabelValues_With(t *testing.T) { + t.Parallel() + tests := []struct { + name string + initial lv.LabelValues + input []string + expected lv.LabelValues + }{ + { + name: "empty to single pair", + initial: lv.LabelValues{}, + input: []string{"key1", "value1"}, + expected: lv.LabelValues{"key1", "value1"}, + }, + { + name: "append to existing", + initial: lv.LabelValues{"key1", "value1"}, + input: []string{"key2", "value2"}, + expected: lv.LabelValues{"key1", "value1", "key2", "value2"}, + }, + { + name: "multiple pairs at once", + initial: lv.LabelValues{}, + input: []string{"key1", "value1", "key2", "value2"}, + expected: lv.LabelValues{"key1", "value1", "key2", "value2"}, + }, + { + name: "odd number of values", + initial: lv.LabelValues{}, + input: []string{"key1", "value1", "key2"}, + expected: lv.LabelValues{"key1", "value1", "key2", "unknown"}, + }, + { + name: "empty input", + initial: lv.LabelValues{"key1", "value1"}, + input: []string{}, + expected: lv.LabelValues{"key1", "value1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.initial.With(tt.input...) + require.Equal(t, tt.expected, result) + }) + } +} + +func TestLabelValues_Immutability(t *testing.T) { + t.Parallel() + // Test that With doesn't modify the original + original := lv.LabelValues{"key1", "value1"} + modified := original.With("key2", "value2") + + require.Equal(t, lv.LabelValues{"key1", "value1"}, original) + require.Equal(t, lv.LabelValues{"key1", "value1", "key2", "value2"}, modified) +} + +func TestLabelValues_Chaining(t *testing.T) { + t.Parallel() + // Test chaining multiple With calls + var lvs lv.LabelValues + lvs = lvs.With("method", "GET") + lvs = lvs.With("status", "200") + lvs = lvs.With("endpoint", "/api/v1/blocks") + + expected := lv.LabelValues{ + "method", "GET", + "status", "200", + "endpoint", "/api/v1/blocks", + } + require.Equal(t, expected, lvs) +} diff --git a/observability/metrics/metrics.go b/observability/metrics/metrics.go new file mode 100644 index 0000000000..c92e8ab385 --- /dev/null +++ b/observability/metrics/metrics.go @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package metrics + +// Counter represents a monotonically increasing metric. +type Counter interface { + // Add increments the counter by the given delta. + // Delta must be non-negative. + Add(delta float64) + + // With returns a new Counter with the given label values applied. + // Label values are provided as key-value pairs. + // If the number of label values is odd, "unknown" is appended. + With(labelValues ...string) Counter +} + +// Gauge represents a metric that can increase or decrease. +type Gauge interface { + // Set sets the gauge to the given value. + Set(value float64) + + // Add increments or decrements the gauge by the given delta. + // Delta can be positive or negative. + Add(delta float64) + + // With returns a new Gauge with the given label values applied. + // Label values are provided as key-value pairs. + // If the number of label values is odd, "unknown" is appended. + With(labelValues ...string) Gauge +} + +// Histogram represents a metric that samples observations and counts them +// in configurable buckets. Histograms are used to track distributions of +// values such as request durations or response sizes. +// +// A histogram automatically provides: +// - Sum of all observed values +// - Count of observations +// - Distribution across configured buckets +type Histogram interface { + // Observe adds a single observation to the histogram. + Observe(value float64) + + // With returns a new Histogram with the given label values applied. + // Label values are provided as key-value pairs. + // If the number of label values is odd, "unknown" is appended. + With(labelValues ...string) Histogram +} + +// CounterOpts defines options for creating a counter metric. +type CounterOpts struct { + Subsystem string + Name string + Help string +} + +// GaugeOpts defines options for creating a gauge metric. +type GaugeOpts struct { + Subsystem string + Name string + Help string +} + +// HistogramOpts defines options for creating a histogram metric. +type HistogramOpts struct { + Subsystem string + Name string + Help string + Buckets []float64 +} + +// Factory creates metrics instances (Counter, Gauge, Histogram). +// Implementations include PrometheusFactory and NoOpFactory. +type Factory interface { + // NewCounter creates a new Counter with the given options and label names. + NewCounter(opts CounterOpts, labelNames []string) Counter + + // NewGauge creates a new Gauge with the given options and label names. + NewGauge(opts GaugeOpts, labelNames []string) Gauge + + // NewHistogram creates a new Histogram with the given options and label names. + NewHistogram(opts HistogramOpts, labelNames []string) Histogram +} diff --git a/observability/metrics/prometheus/prometheus.go b/observability/metrics/prometheus/prometheus.go new file mode 100644 index 0000000000..597a515dd3 --- /dev/null +++ b/observability/metrics/prometheus/prometheus.go @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package prometheus + +import ( + "github.com/berachain/beacon-kit/observability/metrics" + "github.com/berachain/beacon-kit/observability/metrics/lv" + "github.com/prometheus/client_golang/prometheus" +) + +// Factory creates Prometheus metrics and registers them with prometheus.DefaultRegisterer. +type Factory struct { + namespace string +} + +// NewFactory creates a new Prometheus metrics factory with the given namespace. +// +// Example: +// +// factory := prometheus.NewFactory("beacon_kit") +// counter := factory.NewCounter(metrics.CounterOpts{ +// Subsystem: "blockchain", +// Name: "blocks_total", +// Help: "Total number of blocks processed", +// }, []string{"status"}) +func NewFactory(namespace string) metrics.Factory { + return &Factory{namespace: namespace} +} + +// NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. +func (f *Factory) NewCounter(opts metrics.CounterOpts, labelNames []string) metrics.Counter { + return NewCounter(prometheus.CounterOpts{ + Namespace: f.namespace, + Subsystem: opts.Subsystem, + Name: opts.Name, + Help: opts.Help, + }, labelNames) +} + +// NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. +func (f *Factory) NewGauge(opts metrics.GaugeOpts, labelNames []string) metrics.Gauge { + return NewGauge(prometheus.GaugeOpts{ + Namespace: f.namespace, + Subsystem: opts.Subsystem, + Name: opts.Name, + Help: opts.Help, + }, labelNames) +} + +// NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. +func (f *Factory) NewHistogram(opts metrics.HistogramOpts, labelNames []string) metrics.Histogram { + return NewHistogram(prometheus.HistogramOpts{ + Namespace: f.namespace, + Subsystem: opts.Subsystem, + Name: opts.Name, + Help: opts.Help, + Buckets: opts.Buckets, + }, labelNames) +} + +// counter wraps a prometheus.CounterVec and implements the metrics.Counter interface. +type counter struct { + cv *prometheus.CounterVec + lvs lv.LabelValues +} + +// NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. +// +// Example: +// +// c := prometheus.NewCounter(prometheus.CounterOpts{ +// Namespace: "beacon_kit", +// Subsystem: "blockchain", +// Name: "total_blocks", +// Help: "Total number of blocks processed", +// }, []string{"status"}) +func NewCounter(opts prometheus.CounterOpts, labelNames []string) metrics.Counter { + cv := prometheus.NewCounterVec(opts, labelNames) + prometheus.MustRegister(cv) + return &counter{cv: cv} +} + +// NewCounterFrom creates a new Counter from an existing prometheus.CounterVec. +// The CounterVec must already be registered. +func NewCounterFrom(cv *prometheus.CounterVec, labelValues ...string) metrics.Counter { + return &counter{ + cv: cv, + lvs: lv.LabelValues(labelValues), + } +} + +// With returns a new Counter with the given label values applied. +func (c *counter) With(labelValues ...string) metrics.Counter { + return &counter{ + cv: c.cv, + lvs: c.lvs.With(labelValues...), + } +} + +// Add increments the counter by the given delta. +func (c *counter) Add(delta float64) { + c.cv.With(makeLabels(c.lvs...)).Add(delta) +} + +// gauge wraps a prometheus.GaugeVec and implements the metrics.Gauge interface. +type gauge struct { + gv *prometheus.GaugeVec + lvs lv.LabelValues +} + +// NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. +// +// Example: +// +// g := prometheus.NewGauge(prometheus.GaugeOpts{ +// Namespace: "beacon_kit", +// Subsystem: "blockchain", +// Name: "queue_depth", +// Help: "Current depth of the processing queue", +// }, []string{"queue_name"}) +func NewGauge(opts prometheus.GaugeOpts, labelNames []string) metrics.Gauge { + gv := prometheus.NewGaugeVec(opts, labelNames) + prometheus.MustRegister(gv) + return &gauge{gv: gv} +} + +// NewGaugeFrom creates a new Gauge from an existing prometheus.GaugeVec. +// The GaugeVec must already be registered. +func NewGaugeFrom(gv *prometheus.GaugeVec, labelValues ...string) metrics.Gauge { + return &gauge{ + gv: gv, + lvs: lv.LabelValues(labelValues), + } +} + +// With returns a new Gauge with the given label values applied. +func (g *gauge) With(labelValues ...string) metrics.Gauge { + return &gauge{ + gv: g.gv, + lvs: g.lvs.With(labelValues...), + } +} + +// Set sets the gauge to the given value. +func (g *gauge) Set(value float64) { + g.gv.With(makeLabels(g.lvs...)).Set(value) +} + +// Add increments or decrements the gauge by the given delta. +func (g *gauge) Add(delta float64) { + g.gv.With(makeLabels(g.lvs...)).Add(delta) +} + +// histogram wraps a prometheus.HistogramVec and implements the metrics.Histogram interface. +type histogram struct { + hv *prometheus.HistogramVec + lvs lv.LabelValues +} + +// NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. +// +// Example: +// +// h := prometheus.NewHistogram(prometheus.HistogramOpts{ +// Namespace: "beacon_kit", +// Subsystem: "blockchain", +// Name: "block_processing_duration_seconds", +// Help: "Time spent processing blocks", +// Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), +// }, []string{"block_type"}) +func NewHistogram(opts prometheus.HistogramOpts, labelNames []string) metrics.Histogram { + hv := prometheus.NewHistogramVec(opts, labelNames) + prometheus.MustRegister(hv) + return &histogram{hv: hv} +} + +// NewHistogramFrom creates a new Histogram from an existing prometheus.HistogramVec. +// The HistogramVec must already be registered. +func NewHistogramFrom(hv *prometheus.HistogramVec, labelValues ...string) metrics.Histogram { + return &histogram{ + hv: hv, + lvs: lv.LabelValues(labelValues), + } +} + +// With returns a new Histogram with the given label values applied. +func (h *histogram) With(labelValues ...string) metrics.Histogram { + return &histogram{ + hv: h.hv, + lvs: h.lvs.With(labelValues...), + } +} + +// Observe adds a single observation to the histogram. +func (h *histogram) Observe(value float64) { + h.hv.With(makeLabels(h.lvs...)).Observe(value) +} + +// makeLabels converts a slice of label key-value pairs into a prometheus.Labels map. +// The input slice should contain alternating keys and values. +// +//nolint:mnd // 2 is for key-value pairs +func makeLabels(labelValues ...string) prometheus.Labels { + labels := make(prometheus.Labels, len(labelValues)/2) + for i := 0; i < len(labelValues); i += 2 { + labels[labelValues[i]] = labelValues[i+1] + } + return labels +} diff --git a/observability/metrics/prometheus/prometheus_test.go b/observability/metrics/prometheus/prometheus_test.go new file mode 100644 index 0000000000..334ddb3552 --- /dev/null +++ b/observability/metrics/prometheus/prometheus_test.go @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: BUSL-1.1 +// +// Copyright (C) 2025, Berachain Foundation. All rights reserved. +// Use of this software is governed by the Business Source License included +// in the LICENSE file of this repository and at www.mariadb.com/bsl11. +// +// ANY USE OF THE LICENSED WORK IN VIOLATION OF THIS LICENSE WILL AUTOMATICALLY +// TERMINATE YOUR RIGHTS UNDER THIS LICENSE FOR THE CURRENT AND ALL OTHER +// VERSIONS OF THE LICENSED WORK. +// +// THIS LICENSE DOES NOT GRANT YOU ANY RIGHT IN ANY TRADEMARK OR LOGO OF +// LICENSOR OR ITS AFFILIATES (PROVIDED THAT YOU MAY USE A TRADEMARK OR LOGO OF +// LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). +// +// TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +// TITLE. + +package prometheus_test + +import ( + "testing" + + bkprometheus "github.com/berachain/beacon-kit/observability/metrics/prometheus" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" +) + +func TestCounter(t *testing.T) { + t.Parallel() + // Create a new registry for isolated testing + reg := prometheus.NewRegistry() + + cv := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "test", + Subsystem: "counter", + Name: "total", + Help: "Test counter", + }, []string{"label1"}) + reg.MustRegister(cv) + + counter := bkprometheus.NewCounterFrom(cv) + + // Test With and Add + counter.With("label1", "value1").Add(5) + counter.With("label1", "value1").Add(2) + counter.With("label1", "value2").Add(10) + + // Gather metrics + metrics, err := reg.Gather() + require.NoError(t, err) + require.Len(t, metrics, 1) + + // Verify metric family + mf := metrics[0] + require.Equal(t, "test_counter_total", mf.GetName()) + require.Equal(t, dto.MetricType_COUNTER, mf.GetType()) + + // Verify we have metrics with different labels + require.Len(t, mf.GetMetric(), 2) +} + +func TestGauge(t *testing.T) { + t.Parallel() + // Create a new registry for isolated testing + reg := prometheus.NewRegistry() + + gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "test", + Subsystem: "gauge", + Name: "current", + Help: "Test gauge", + }, []string{"label1"}) + reg.MustRegister(gv) + + gauge := bkprometheus.NewGaugeFrom(gv) + + // Test Set + gauge.With("label1", "value1").Set(42) + + // Test Add (increment) + gauge.With("label1", "value1").Add(8) + + // Test Add (decrement) + gauge.With("label1", "value1").Add(-10) + + // Gather metrics + metrics, err := reg.Gather() + require.NoError(t, err) + require.Len(t, metrics, 1) + + // Verify metric family + mf := metrics[0] + require.Equal(t, "test_gauge_current", mf.GetName()) + require.Equal(t, dto.MetricType_GAUGE, mf.GetType()) + + // Verify value (42 + 8 - 10 = 40) + require.Len(t, mf.GetMetric(), 1) + require.InDelta(t, float64(40), mf.GetMetric()[0].GetGauge().GetValue(), 0.001) +} + +func TestHistogram(t *testing.T) { + t.Parallel() + // Create a new registry for isolated testing + reg := prometheus.NewRegistry() + + hv := prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "test", + Subsystem: "histogram", + Name: "duration_seconds", + Help: "Test histogram", + Buckets: []float64{0.1, 0.5, 1.0, 5.0}, + }, []string{"label1"}) + reg.MustRegister(hv) + + histogram := bkprometheus.NewHistogramFrom(hv) + + // Test Observe + histogram.With("label1", "value1").Observe(0.3) + histogram.With("label1", "value1").Observe(0.7) + histogram.With("label1", "value1").Observe(2.0) + + // Gather metrics + metrics, err := reg.Gather() + require.NoError(t, err) + require.Len(t, metrics, 1) + + // Verify metric family + mf := metrics[0] + require.Equal(t, "test_histogram_duration_seconds", mf.GetName()) + require.Equal(t, dto.MetricType_HISTOGRAM, mf.GetType()) + + // Verify histogram properties + require.Len(t, mf.GetMetric(), 1) + h := mf.GetMetric()[0].GetHistogram() + require.Equal(t, uint64(3), h.GetSampleCount()) + require.InDelta(t, 3.0, h.GetSampleSum(), 0.001) + + // Verify buckets + // Observations: 0.3, 0.7, 2.0 + // Buckets: 0.1, 0.5, 1.0, 5.0 + require.Len(t, h.GetBucket(), 4) + require.Equal(t, uint64(0), h.GetBucket()[0].GetCumulativeCount()) // <= 0.1: none + require.Equal(t, uint64(1), h.GetBucket()[1].GetCumulativeCount()) // <= 0.5: 0.3 + require.Equal(t, uint64(2), h.GetBucket()[2].GetCumulativeCount()) // <= 1.0: 0.3, 0.7 + require.Equal(t, uint64(3), h.GetBucket()[3].GetCumulativeCount()) // <= 5.0: 0.3, 0.7, 2.0 +} + +func TestCounterLabelChaining(t *testing.T) { + t.Parallel() + // Create a new registry for isolated testing + reg := prometheus.NewRegistry() + + cv := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "test", + Subsystem: "counter", + Name: "chaining", + Help: "Test counter chaining", + }, []string{"label1", "label2"}) + reg.MustRegister(cv) + + counter := bkprometheus.NewCounterFrom(cv) + + // Test chaining With calls + counter.With("label1", "value1").With("label2", "value2").Add(5) + + // Gather metrics + metrics, err := reg.Gather() + require.NoError(t, err) + require.Len(t, metrics, 1) + + mf := metrics[0] + require.Len(t, mf.GetMetric(), 1) + require.InDelta(t, float64(5), mf.GetMetric()[0].GetCounter().GetValue(), 0.001) + + // Verify labels + labels := mf.GetMetric()[0].GetLabel() + require.Len(t, labels, 2) +} diff --git a/state-transition/core/interfaces.go b/state-transition/core/interfaces.go index 585d4243e3..5437710b2d 100644 --- a/state-transition/core/interfaces.go +++ b/state-transition/core/interfaces.go @@ -63,14 +63,6 @@ type ExecutionEngine interface { ) error } -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - SetGauge(key string, value int64, args ...string) - // IncrementCounter increments the counter identified by - // the provided key. - IncrementCounter(key string, args ...string) -} - type ChainSpec interface { chain.HysteresisSpec chain.BalancesSpec diff --git a/state-transition/core/metrics.go b/state-transition/core/metrics.go index 48ca3d8418..cde0690a62 100644 --- a/state-transition/core/metrics.go +++ b/state-transition/core/metrics.go @@ -13,7 +13,7 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. @@ -21,60 +21,122 @@ package core import ( + "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" ) -type stateProcessorMetrics struct { - // sink is the sink for the metrics. - sink TelemetrySink +// Metrics contains metrics for the state processor. +type Metrics struct { + BlockTxGasUsed metrics.Gauge + BlockBlobGasUsed metrics.Gauge + PartialWithdrawalsEnqueued metrics.Gauge + PayloadConsensusTimestampDiff metrics.Gauge + DepositStakeLost metrics.Counter + PartialWithdrawalRequestDropped metrics.Counter + PartialWithdrawalRequestInvalid metrics.Counter + ValidatorNotWithdrawable metrics.Counter } -// newStateProcessorMetrics creates a new stateProcessorMetrics. -func newStateProcessorMetrics(sink TelemetrySink) *stateProcessorMetrics { - return &stateProcessorMetrics{ - sink: sink, +// NewMetrics returns a new Metrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + BlockTxGasUsed: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "state", + Name: "block_tx_gas_used", + Help: "Transaction gas used in the block", + }, + []string{"block_number"}, + ), + BlockBlobGasUsed: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "state", + Name: "block_blob_gas_used", + Help: "Blob gas used in the block", + }, + []string{"block_number"}, + ), + PartialWithdrawalsEnqueued: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "state", + Name: "partial_withdrawals_enqueued", + Help: "Number of partial withdrawals enqueued", + }, + nil, + ), + PayloadConsensusTimestampDiff: factory.NewGauge( + metrics.GaugeOpts{ + Subsystem: "state", + Name: "payload_consensus_timestamp_diff", + Help: "Difference between payload timestamp and consensus timestamp", + }, + nil, + ), + DepositStakeLost: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "state", + Name: "deposit_stake_lost", + Help: "Number of deposits with stake lost", + }, + nil, + ), + PartialWithdrawalRequestDropped: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "state", + Name: "partial_withdrawal_request_dropped", + Help: "Number of partial withdrawal requests dropped", + }, + nil, + ), + PartialWithdrawalRequestInvalid: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "state", + Name: "partial_withdrawal_request_invalid", + Help: "Number of invalid partial withdrawal requests", + }, + nil, + ), + ValidatorNotWithdrawable: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "state", + Name: "validator_not_withdrawable", + Help: "Number of validators not withdrawable", + }, + nil, + ), } } -func (s *stateProcessorMetrics) gaugeBlockGasUsed(blockNumber, txGasUsed, blobGasUsed math.U64) { +func (m *Metrics) gaugeBlockGasUsed(blockNumber, txGasUsed, blobGasUsed math.U64) { blockNumberStr := blockNumber.Base10() - s.sink.SetGauge( - "beacon_kit.state.block_tx_gas_used", - int64(txGasUsed.Unwrap()), // #nosec G115 - "block_number", - blockNumberStr, - ) - s.sink.SetGauge( - "beacon_kit.state.block_blob_gas_used", - int64(blobGasUsed.Unwrap()), // #nosec G115 - "block_number", - blockNumberStr, - ) + m.BlockTxGasUsed.With("block_number", blockNumberStr).Set(float64(txGasUsed.Unwrap())) + m.BlockBlobGasUsed.With("block_number", blockNumberStr).Set(float64(blobGasUsed.Unwrap())) } -func (s *stateProcessorMetrics) gaugePartialWithdrawalsEnqueued(count int) { - s.sink.SetGauge("beacon_kit.state.partial_withdrawals_enqueued", int64(count)) +func (m *Metrics) gaugePartialWithdrawalsEnqueued(count int) { + m.PartialWithdrawalsEnqueued.Set(float64(count)) } -func (s *stateProcessorMetrics) gaugeTimestamps(payloadTimestamp, consensusTimestamp uint64) { +func (m *Metrics) gaugeTimestamps(payloadTimestamp, consensusTimestamp uint64) { // the diff can be positive or negative depending on whether the payload // timestamp is ahead or behind the consensus timestamp diff := int64(payloadTimestamp) - int64(consensusTimestamp) // #nosec G115 - s.sink.SetGauge("beacon_kit.state.payload_consensus_timestamp_diff", diff) + m.PayloadConsensusTimestampDiff.Set(float64(diff)) } -func (s *stateProcessorMetrics) incrementDepositStakeLost() { - s.sink.IncrementCounter("beacon_kit.state.deposit_stake_lost") +func (m *Metrics) incrementDepositStakeLost() { + m.DepositStakeLost.Add(1) } -func (s *stateProcessorMetrics) incrementPartialWithdrawalRequestDropped() { - s.sink.IncrementCounter("beacon_kit.state.partial_withdrawal_request_dropped") +func (m *Metrics) incrementPartialWithdrawalRequestDropped() { + m.PartialWithdrawalRequestDropped.Add(1) } -func (s *stateProcessorMetrics) incrementPartialWithdrawalRequestInvalid() { - s.sink.IncrementCounter("beacon_kit.state.partial_withdrawal_request_invalid") +func (m *Metrics) incrementPartialWithdrawalRequestInvalid() { + m.PartialWithdrawalRequestInvalid.Add(1) } -func (s *stateProcessorMetrics) incrementValidatorNotWithdrawable() { - s.sink.IncrementCounter("beacon_kit.state.validator_not_withdrawable") +func (m *Metrics) incrementValidatorNotWithdrawable() { + m.ValidatorNotWithdrawable.Add(1) } diff --git a/state-transition/core/state/interfaces.go b/state-transition/core/state/interfaces.go index 646216b8fc..30918a5416 100644 --- a/state-transition/core/state/interfaces.go +++ b/state-transition/core/state/interfaces.go @@ -36,8 +36,3 @@ type ChainSpec interface { ActiveForkVersionForTimestamp(timestamp math.U64) common.Version MinActivationBalance() math.Gwei } - -// TelemetrySink is an interface for sending metrics to a telemetry backend. -type TelemetrySink interface { - IncrementCounter(key string, args ...string) -} diff --git a/state-transition/core/state/metrics.go b/state-transition/core/state/metrics.go index 132b013265..73089d378a 100644 --- a/state-transition/core/state/metrics.go +++ b/state-transition/core/state/metrics.go @@ -13,19 +13,57 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. package state +import ( + "github.com/berachain/beacon-kit/observability/metrics" +) + +// Metrics is a struct that contains metrics for the StateDB. +type Metrics struct { + // PartialWithdrawalRequestInvalid tracks invalid partial withdrawal requests + PartialWithdrawalRequestInvalid metrics.Counter + + // ExcessStakePartialWithdrawal tracks withdrawals due to excess stake + ExcessStakePartialWithdrawal metrics.Counter +} + +// NewMetrics returns a new Metrics instance. +// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. +func NewMetrics(factory metrics.Factory) *Metrics { + return &Metrics{ + PartialWithdrawalRequestInvalid: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "statedb", + Name: "partial_withdrawal_request_invalid", + Help: "Number of invalid partial withdrawal requests", + }, + nil, + ), + ExcessStakePartialWithdrawal: factory.NewCounter( + metrics.CounterOpts{ + Subsystem: "statedb", + Name: "excess_stake_partial_withdrawal", + Help: "Number of withdrawals created due to validator stake exceeding MaxEffectiveBalance", + }, + nil, + ), + } +} + +// incrementPartialWithdrawalRequestInvalid increments the counter for invalid +// partial withdrawal requests. func (s *StateDB) incrementPartialWithdrawalRequestInvalid() { - s.telemetrySink.IncrementCounter("beacon_kit.statedb.partial_withdrawal_request_invalid") + s.metrics.PartialWithdrawalRequestInvalid.Add(1) } -// incrementExcessStakePartialWithdrawal increments the telemetry counter when a withdrawal is created -// because a validator's stake went over the MaxEffectiveBalance. +// incrementExcessStakePartialWithdrawal increments the counter when a withdrawal +// is created because a validator's stake went over the MaxEffectiveBalance. func (s *StateDB) incrementExcessStakePartialWithdrawal() { - s.telemetrySink.IncrementCounter("beacon_kit.statedb.excess_stake_partial_withdrawal") + s.metrics.ExcessStakePartialWithdrawal.Add(1) } diff --git a/state-transition/core/state/statedb.go b/state-transition/core/state/statedb.go index 95710d9992..a9c18d3a33 100644 --- a/state-transition/core/state/statedb.go +++ b/state-transition/core/state/statedb.go @@ -40,20 +40,20 @@ import ( type StateDB struct { beacondb.KVStore - cs ChainSpec - logger log.Logger - telemetrySink TelemetrySink + cs ChainSpec + logger log.Logger + metrics *Metrics } // NewBeaconStateFromDB creates a new beacon state from an underlying state db. func NewBeaconStateFromDB( - bdb *beacondb.KVStore, cs ChainSpec, logger log.Logger, telemetrySink TelemetrySink, + bdb *beacondb.KVStore, cs ChainSpec, logger log.Logger, metrics *Metrics, ) *StateDB { return &StateDB{ - KVStore: *bdb, - cs: cs, - logger: logger, - telemetrySink: telemetrySink, + KVStore: *bdb, + cs: cs, + logger: logger, + metrics: metrics, } } @@ -64,7 +64,7 @@ func NewBeaconStateFromDB( // So write operations to the top cache won't be flushed to the lower layer but read operations will walk // through the cache stack, so bubbling up changes from the lower layers to the top ones. func (s *StateDB) Protect(ctx context.Context) *StateDB { - return NewBeaconStateFromDB(s.KVStore.Copy(ctx), s.cs, s.logger, s.telemetrySink) + return NewBeaconStateFromDB(s.KVStore.Copy(ctx), s.cs, s.logger, s.metrics) } // GetEpoch returns the current epoch. diff --git a/state-transition/core/state/statedb_test.go b/state-transition/core/state/statedb_test.go index cba8ec6b48..3feb1bd6d5 100644 --- a/state-transition/core/state/statedb_test.go +++ b/state-transition/core/state/statedb_test.go @@ -30,7 +30,7 @@ import ( storetypes "cosmossdk.io/store/types" "github.com/berachain/beacon-kit/config/spec" ctypes "github.com/berachain/beacon-kit/consensus-types/types" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/math" "github.com/berachain/beacon-kit/state-transition/core/state" @@ -69,7 +69,7 @@ func TestStateProtect(t *testing.T) { kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), - metrics.NewNoOpTelemetrySink(), + state.NewMetrics(discard.NewFactory()), ) protectingState := originalState.Protect(sdkCtx) diff --git a/state-transition/core/state_processor.go b/state-transition/core/state_processor.go index f8aebb18f3..e8eb8c3f0f 100644 --- a/state-transition/core/state_processor.go +++ b/state-transition/core/state_processor.go @@ -55,7 +55,7 @@ type StateProcessor struct { // ds allows checking payload deposits against the deposit contract ds deposit.StoreManager // metrics is the metrics for the service. - metrics *stateProcessorMetrics + metrics *Metrics // logDeneb1Once enforces logging the Deneb1 fork information at most once. logDeneb1Once sync.Once } @@ -68,7 +68,7 @@ func NewStateProcessor( ds deposit.StoreManager, signer crypto.BLSSigner, fGetAddressFromPubKey func(crypto.BLSPubkey) ([]byte, error), - telemetrySink TelemetrySink, + metrics *Metrics, ) *StateProcessor { return &StateProcessor{ logger: logger, @@ -77,7 +77,7 @@ func NewStateProcessor( signer: signer, fGetAddressFromPubKey: fGetAddressFromPubKey, ds: ds, - metrics: newStateProcessorMetrics(telemetrySink), + metrics: metrics, } } diff --git a/state-transition/core/state_processor_payload_test.go b/state-transition/core/state_processor_payload_test.go index 75df073418..d20e04bd49 100644 --- a/state-transition/core/state_processor_payload_test.go +++ b/state-transition/core/state_processor_payload_test.go @@ -30,7 +30,7 @@ import ( storetypes "cosmossdk.io/store/types" payloadtime "github.com/berachain/beacon-kit/beacon/payload-time" "github.com/berachain/beacon-kit/consensus-types/types" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/constants" "github.com/berachain/beacon-kit/primitives/math" @@ -129,7 +129,7 @@ func TestPayloadTimestampVerification(t *testing.T) { // create independent states per each test sdkCtx := sdk.NewContext(cms.CacheMultiStore(), true, log.NewNopLogger()) testSt := statedb.NewBeaconStateFromDB( - st.KVStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), metrics.NewNoOpTelemetrySink(), + st.KVStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), statedb.NewMetrics(discard.NewFactory()), ) tCtx := transition.NewTransitionCtx( diff --git a/storage/beacondb/registry_test.go b/storage/beacondb/registry_test.go index 720b537021..fd06768063 100644 --- a/storage/beacondb/registry_test.go +++ b/storage/beacondb/registry_test.go @@ -28,7 +28,6 @@ import ( corestore "cosmossdk.io/core/store" "cosmossdk.io/log" "cosmossdk.io/store" - "cosmossdk.io/store/metrics" storetypes "cosmossdk.io/store/types" "github.com/berachain/beacon-kit/consensus-types/types" "github.com/berachain/beacon-kit/primitives/bytes" @@ -296,15 +295,11 @@ func initTestStore() (*beacondb.KVStore, error) { if err != nil { return nil, fmt.Errorf("failed opening mem db: %w", err) } - var ( - nopLog = log.NewNopLogger() - nopMetrics = metrics.NewNoOpMetrics() - ) - + nopLog := log.NewNopLogger() cms := store.NewCommitMultiStore( db, nopLog, - nopMetrics, + storage.NoOpStoreMetrics{}, ) cms.MountStoreWithDB(testStoreKey, storetypes.StoreTypeIAVL, nil) diff --git a/node-core/components/telemetry_sink.go b/storage/noop_metrics.go similarity index 62% rename from node-core/components/telemetry_sink.go rename to storage/noop_metrics.go index f9ecd96f2e..b986e8d30d 100644 --- a/node-core/components/telemetry_sink.go +++ b/storage/noop_metrics.go @@ -13,16 +13,18 @@ // LICENSOR AS EXPRESSLY REQUIRED BY THIS LICENSE). // // TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON -// AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +// AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, // EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND // TITLE. -package components +package storage -import "github.com/berachain/beacon-kit/node-core/components/metrics" +// NoOpStoreMetrics is a no-op implementation of cosmossdk.io/store/types.StoreMetrics. +// By providing our own implementation, we avoid directly importing cosmossdk.io/store/metrics, +// which has transitive dependencies on go-metrics packages +type NoOpStoreMetrics struct{} -// ProvideTelemetrySink is a function that provides a TelemetrySink. -func ProvideTelemetrySink() *metrics.TelemetrySink { - return &metrics.TelemetrySink{} -} +// MeasureSince is a no-op implementation that does nothing. +// This avoids time.Now() calls and metric recording overhead. +func (NoOpStoreMetrics) MeasureSince(...string) {} diff --git a/testing/networks/80069/app.toml b/testing/networks/80069/app.toml index 09ac280065..b82f404379 100644 --- a/testing/networks/80069/app.toml +++ b/testing/networks/80069/app.toml @@ -72,44 +72,12 @@ app-db-backend = "pebbledb" [telemetry] -# Prefixed with keys to separate services. -service-name = "beacond_node" - -# Enabled enables the application telemetry functionality. When enabled, -# an in-memory sink is also enabled by default. Operators may also enabled -# other sinks such as Prometheus. +# Enabled enables Prometheus metrics collection for all beacon-kit services. +# When true, metrics are exposed at the configured Prometheus endpoint. +# When false, all metrics are no-op with zero runtime overhead. +# Default: false enabled = true -# Enable prefixing gauge values with hostname. -enable-hostname = true - -# Enable adding hostname to labels. -enable-hostname-label = true - -# Enable adding service to labels. -enable-service-label = true - -# PrometheusRetentionTime, when positive, enables a Prometheus metrics sink. -prometheus-retention-time = 60 - -# GlobalLabels defines a global set of name/value label tuples applied to all -# metrics emitted using the wrapper functions defined in telemetry package. -# -# Example: -# [["chain_id", "cosmoshub-1"]] -global-labels = [] - -# MetricsSink defines the type of metrics sink to use. -metrics-sink = "" - -# StatsdAddr defines the address of a statsd server to send metrics to. -# Only utilized if MetricsSink is set to "statsd" or "dogstatsd". -statsd-addr = "" - -# DatadogHostname defines the hostname to use when emitting metrics to -# Datadog. Only utilized if MetricsSink is set to "dogstatsd". -datadog-hostname = "my_beacond_node" - ############################################################################### ### BeaconKit ### ############################################################################### diff --git a/testing/networks/80094/app.toml b/testing/networks/80094/app.toml index ed2e909bd9..55af21f89a 100644 --- a/testing/networks/80094/app.toml +++ b/testing/networks/80094/app.toml @@ -72,44 +72,12 @@ app-db-backend = "pebbledb" [telemetry] -# Prefixed with keys to separate services. -service-name = "beacond_node" - -# Enabled enables the application telemetry functionality. When enabled, -# an in-memory sink is also enabled by default. Operators may also enabled -# other sinks such as Prometheus. +# Enabled enables Prometheus metrics collection for all beacon-kit services. +# When true, metrics are exposed at the configured Prometheus endpoint. +# When false, all metrics are no-op with zero runtime overhead. +# Default: false enabled = true -# Enable prefixing gauge values with hostname. -enable-hostname = true - -# Enable adding hostname to labels. -enable-hostname-label = true - -# Enable adding service to labels. -enable-service-label = true - -# PrometheusRetentionTime, when positive, enables a Prometheus metrics sink. -prometheus-retention-time = 60 - -# GlobalLabels defines a global set of name/value label tuples applied to all -# metrics emitted using the wrapper functions defined in telemetry package. -# -# Example: -# [["chain_id", "cosmoshub-1"]] -global-labels = [] - -# MetricsSink defines the type of metrics sink to use. -metrics-sink = "" - -# StatsdAddr defines the address of a statsd server to send metrics to. -# Only utilized if MetricsSink is set to "statsd" or "dogstatsd". -statsd-addr = "" - -# DatadogHostname defines the hostname to use when emitting metrics to -# Datadog. Only utilized if MetricsSink is set to "dogstatsd". -datadog-hostname = "my_beacond_node" - ############################################################################### ### BeaconKit ### ############################################################################### diff --git a/testing/simulated/blob_fetcher_integration_test.go b/testing/simulated/blob_fetcher_integration_test.go index 1af4695645..68b0d90ac4 100644 --- a/testing/simulated/blob_fetcher_integration_test.go +++ b/testing/simulated/blob_fetcher_integration_test.go @@ -37,7 +37,7 @@ import ( "github.com/berachain/beacon-kit/da/blobreactor" dastore "github.com/berachain/beacon-kit/da/store" datypes "github.com/berachain/beacon-kit/da/types" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/crypto" "github.com/berachain/beacon-kit/primitives/eip4844" @@ -84,13 +84,13 @@ func (s *SimulatedSuite) TestBlobFetcher_MultiNodeFetch() { node1Store, log.NewNopLogger(), blobreactor.Config{RequestTimeout: 5 * time.Second}, - metrics.NewNoOpTelemetrySink(), + blobreactor.NewMetrics(discard.NewFactory()), ) node2Reactor := blobreactor.NewBlobReactor( node2Store, log.NewNopLogger(), blobreactor.Config{RequestTimeout: 5 * time.Second}, - metrics.NewNoOpTelemetrySink(), + blobreactor.NewMetrics(discard.NewFactory()), ) // Connect via P2P @@ -114,7 +114,7 @@ func (s *SimulatedSuite) TestBlobFetcher_MultiNodeFetch() { RetryInterval: 200 * time.Millisecond, MaxRetries: 3, }, - metrics.NewNoOpTelemetrySink(), + blockchain.NewBlobFetcherMetrics(discard.NewFactory()), ) s.Require().NoError(err) node1Fetcher.Start(s.CtxApp) @@ -166,7 +166,7 @@ func createTestSidecars(t *testing.T, s *SimulatedSuite, blobs []*eip4844.Blob, ) signedHeader := ctypes.NewSignedBeaconBlockHeader(block.GetHeader(), crypto.BLSSignature{}) - sidecarFactory := dablob.NewSidecarFactory(metrics.NewNoOpTelemetrySink()) + sidecarFactory := dablob.NewSidecarFactory(dablob.NewFactoryMetrics(discard.NewFactory())) sidecars := make(datypes.BlobSidecars, len(blobs)) for i := range blobs { inclusionProof, err := sidecarFactory.BuildKZGInclusionProof(block.Body, math.U64(i)) diff --git a/testing/simulated/components.go b/testing/simulated/components.go index 47f27dddf9..fe5f0a0b3f 100644 --- a/testing/simulated/components.go +++ b/testing/simulated/components.go @@ -57,13 +57,13 @@ func FixedComponents(t *testing.T) []any { components.ProvideStateProcessor, components.ProvideKVStore, components.ProvideStorageBackend, - components.ProvideTelemetrySink, - components.ProvideTelemetryService, components.ProvideTrustedSetup, components.ProvideValidatorService, components.ProvideNodeAPIServer, components.ProvideShutDownService, } + // Add all metrics providers (factory + individual metrics) + c = append(c, components.AllMetricsProviders()...) return c } diff --git a/testing/simulated/malicious_proposer_test.go b/testing/simulated/malicious_proposer_test.go index 3222807c9e..06f298a218 100644 --- a/testing/simulated/malicious_proposer_test.go +++ b/testing/simulated/malicious_proposer_test.go @@ -34,7 +34,7 @@ import ( datypes "github.com/berachain/beacon-kit/da/types" "github.com/berachain/beacon-kit/engine-primitives/errors" gethprimitives "github.com/berachain/beacon-kit/geth-primitives" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/common" "github.com/berachain/beacon-kit/primitives/eip4844" "github.com/berachain/beacon-kit/primitives/math" @@ -346,7 +346,7 @@ func (s *SimulatedSuite) TestProcessProposal_InvalidBlobCommitment_Errors() { sidecarsSlice := make([]*datypes.BlobSidecar, len(blobs)) // Build Inclusion Proofs for Sidecars - sidecarFactory := dablob.NewSidecarFactory(metrics.NewNoOpTelemetrySink()) + sidecarFactory := dablob.NewSidecarFactory(dablob.NewFactoryMetrics(discard.NewFactory())) for i := range blobs { inclusionProof, err := sidecarFactory.BuildKZGInclusionProof(proposedBlockMessage.GetBody(), math.U64(i)) s.Require().NoError(err) @@ -508,7 +508,7 @@ func (s *SimulatedSuite) TestProcessProposal_InvalidBlobInclusionProof_Errors() sidecarsSlice := make([]*datypes.BlobSidecar, len(blobs)) // Build Inclusion Proofs for Sidecars - sidecarFactory := dablob.NewSidecarFactory(metrics.NewNoOpTelemetrySink()) + sidecarFactory := dablob.NewSidecarFactory(dablob.NewFactoryMetrics(discard.NewFactory())) for i := range blobs { inclusionProof, err := sidecarFactory.BuildKZGInclusionProof(proposedBlockMessage.GetBody(), math.U64(i)) s.Require().NoError(err) diff --git a/testing/simulated/simcomet.go b/testing/simulated/simcomet.go index 292358f963..b3ea5c01e3 100644 --- a/testing/simulated/simcomet.go +++ b/testing/simulated/simcomet.go @@ -32,7 +32,6 @@ import ( cometbft "github.com/berachain/beacon-kit/consensus/cometbft/service" "github.com/berachain/beacon-kit/log/phuslu" "github.com/berachain/beacon-kit/node-core/builder" - "github.com/berachain/beacon-kit/node-core/components/metrics" "github.com/berachain/beacon-kit/node-core/types" cmtcfg "github.com/cometbft/cometbft/config" cmtcrypto "github.com/cometbft/cometbft/crypto" @@ -61,7 +60,7 @@ func ProvideSimComet( cs chain.Spec, cmtCfg *cmtcfg.Config, appOpts config.AppOptions, - telemetrySink *metrics.TelemetrySink) *SimComet { + metrics *cometbft.Metrics) *SimComet { return &SimComet{ Comet: cometbft.NewService( logger, @@ -71,7 +70,7 @@ func ProvideSimComet( blobReactor, cs, cmtCfg, - telemetrySink, + metrics, builder.DefaultServiceOptions(appOpts)..., ), cmtCfg: cmtCfg, diff --git a/testing/simulated/valid_chain_test.go b/testing/simulated/valid_chain_test.go index 9ff08a1b7d..65a186f424 100644 --- a/testing/simulated/valid_chain_test.go +++ b/testing/simulated/valid_chain_test.go @@ -32,7 +32,7 @@ import ( dablob "github.com/berachain/beacon-kit/da/blob" datypes "github.com/berachain/beacon-kit/da/types" gethprimitives "github.com/berachain/beacon-kit/geth-primitives" - "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/eip4844" "github.com/berachain/beacon-kit/primitives/math" "github.com/berachain/beacon-kit/testing/simulated" @@ -331,7 +331,7 @@ func (s *SimulatedSuite) TestFullLifecycle_ValidBlockAndInjectedBlob_IsSuccessfu sidecarsSlice := make([]*datypes.BlobSidecar, len(blobs)) // Build Inclusion Proofs for Sidecars - sidecarFactory := dablob.NewSidecarFactory(metrics.NewNoOpTelemetrySink()) + sidecarFactory := dablob.NewSidecarFactory(dablob.NewFactoryMetrics(discard.NewFactory())) for i := range blobs { inclusionProof, err := sidecarFactory.BuildKZGInclusionProof(proposedBlockMessage.GetBody(), math.U64(i)) s.Require().NoError(err) diff --git a/testing/state-transition/state-transition.go b/testing/state-transition/state-transition.go index dcde53b511..4de52e5c49 100644 --- a/testing/state-transition/state-transition.go +++ b/testing/state-transition/state-transition.go @@ -29,12 +29,11 @@ import ( corestore "cosmossdk.io/core/store" "cosmossdk.io/log" "cosmossdk.io/store" - "cosmossdk.io/store/metrics" storetypes "cosmossdk.io/store/types" "github.com/berachain/beacon-kit/chain" "github.com/berachain/beacon-kit/consensus-types/types" "github.com/berachain/beacon-kit/log/noop" - nodemetrics "github.com/berachain/beacon-kit/node-core/components/metrics" + "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/primitives/bytes" cryptomocks "github.com/berachain/beacon-kit/primitives/crypto/mocks" "github.com/berachain/beacon-kit/primitives/transition" @@ -88,15 +87,11 @@ func BuildTestStores() ( return nil, nil, nil, fmt.Errorf("failed opening mem deposits db: %w", err) } - var ( - nopLog = log.NewNopLogger() - nopMetrics = metrics.NewNoOpMetrics() - ) - + nopLog := log.NewNopLogger() cms := store.NewCommitMultiStore( appDB, nopLog, - nopMetrics, + storage.NoOpStoreMetrics{}, ) cms.MountStoreWithDB(testStoreKey, storetypes.StoreTypeIAVL, nil) @@ -134,7 +129,7 @@ func SetupTestState(t *testing.T, cs chain.Spec) ( sdkCtx := sdk.NewContext(cms.CacheMultiStore(), true, log.NewNopLogger()) beaconState := statedb.NewBeaconStateFromDB( - kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), nodemetrics.NewNoOpTelemetrySink(), + kvStore.WithContext(sdkCtx), cs, sdkCtx.Logger(), statedb.NewMetrics(discard.NewFactory()), ) sp := core.NewStateProcessor( @@ -146,7 +141,7 @@ func SetupTestState(t *testing.T, cs chain.Spec) ( func(bytes.B48) ([]byte, error) { return DummyProposerAddr, nil }, - nodemetrics.NewNoOpTelemetrySink(), + core.NewMetrics(discard.NewFactory()), ) // by default we keep checks at minimum. It is up From 376a512ff624fea917f3eb8f5ec3688d59d99e40 Mon Sep 17 00:00:00 2001 From: Fridrik Asmundsson Date: Fri, 24 Oct 2025 14:58:45 +0000 Subject: [PATCH 2/5] Add Summary metrics and configurable service-name for backward compatibility --- beacon/blockchain/blob_fetcher_metrics.go | 25 ++-- beacon/blockchain/metrics.go | 57 ++++----- beacon/validator/metrics.go | 34 +++--- config/config/config.go | 6 +- config/config/config.toml.tpl | 5 +- consensus/cometbft/service/metrics.go | 53 ++++---- da/blob/factory_metrics.go | 53 ++++---- da/blob/processor_metrics.go | 27 ++-- da/blob/verifier_metrics.go | 40 +++--- da/blobreactor/metrics.go | 44 +++---- execution/client/metrics.go | 115 ++++++++---------- execution/engine/metrics.go | 75 +++++------- node-core/components/metrics_factory.go | 2 +- observability/metrics/discard/discard.go | 38 +++--- observability/metrics/lv/labelvalues.go | 20 --- observability/metrics/metrics.go | 57 +++++++-- .../metrics/prometheus/prometheus.go | 85 +++++++------ state-transition/core/metrics.go | 40 +++--- state-transition/core/state/metrics.go | 10 +- testing/networks/80069/app.toml | 3 + testing/networks/80094/app.toml | 3 + 21 files changed, 370 insertions(+), 422 deletions(-) diff --git a/beacon/blockchain/blob_fetcher_metrics.go b/beacon/blockchain/blob_fetcher_metrics.go index 4c49a30af8..0b20fd3bd4 100644 --- a/beacon/blockchain/blob_fetcher_metrics.go +++ b/beacon/blockchain/blob_fetcher_metrics.go @@ -45,41 +45,36 @@ func NewBlobFetcherMetrics(factory metrics.Factory) *BlobFetcherMetrics { return &BlobFetcherMetrics{ RetriesTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blob_fetcher", - Name: "retries_total", - Help: "Number of times a blob request was retried after failure", + Name: "beacon_kit_blob_fetcher_retries_total", + Help: "Number of times a blob request was retried after failure", }, nil, ), RequestsExpiredTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blob_fetcher", - Name: "requests_expired_total", - Help: "Number of blob fetch requests that expired before completion", + Name: "beacon_kit_blob_fetcher_requests_expired_total", + Help: "Number of blob fetch requests that expired before completion", }, []string{"reason"}, ), RequestsCompletedTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blob_fetcher", - Name: "requests_completed_total", - Help: "Number of blob fetch requests that completed successfully", + Name: "beacon_kit_blob_fetcher_requests_completed_total", + Help: "Number of blob fetch requests that completed successfully", }, nil, ), RequestsQueuedTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blob_fetcher", - Name: "requests_queued_total", - Help: "Number of new blob fetch requests added to the queue", + Name: "beacon_kit_blob_fetcher_requests_queued_total", + Help: "Number of new blob fetch requests added to the queue", }, nil, ), QueueDepth: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "blob_fetcher", - Name: "queue_depth", - Help: "Current depth of the blob fetcher queue", + Name: "beacon_kit_blob_fetcher_queue_depth", + Help: "Current depth of the blob fetcher queue", }, nil, ), diff --git a/beacon/blockchain/metrics.go b/beacon/blockchain/metrics.go index 526754053b..9752bf76bf 100644 --- a/beacon/blockchain/metrics.go +++ b/beacon/blockchain/metrics.go @@ -25,17 +25,16 @@ import ( "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" - "github.com/prometheus/client_golang/prometheus" ) // Metrics is a struct that contains metrics for the blockchain service. type Metrics struct { - StateTransitionDuration metrics.Histogram + StateTransitionDuration metrics.Summary RebuildPayloadForRejectedBlockSuccess metrics.Counter RebuildPayloadForRejectedBlockFailure metrics.Counter OptimisticPayloadBuildSuccess metrics.Counter OptimisticPayloadBuildFailure metrics.Counter - StateRootVerificationDuration metrics.Histogram + StateRootVerificationDuration metrics.Summary FailedToGetBlockLogs metrics.Counter FailedToEnqueueDeposits metrics.Counter } @@ -44,69 +43,61 @@ type Metrics struct { // Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ - StateTransitionDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "beacon_blockchain", - Name: "state_transition_duration", - Help: "Time taken to process state transition in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + StateTransitionDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_beacon_blockchain_state_transition_duration", + Help: "Time taken to process state transition in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), RebuildPayloadForRejectedBlockSuccess: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blockchain", - Name: "rebuild_payload_for_rejected_block_success", - Help: "Number of successful payload rebuilds for rejected blocks", + Name: "beacon_kit_blockchain_rebuild_payload_for_rejected_block_success", + Help: "Number of successful payload rebuilds for rejected blocks", }, []string{"slot"}, ), RebuildPayloadForRejectedBlockFailure: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blockchain", - Name: "rebuild_payload_for_rejected_block_failure", - Help: "Number of failed payload rebuilds for rejected blocks", + Name: "beacon_kit_blockchain_rebuild_payload_for_rejected_block_failure", + Help: "Number of failed payload rebuilds for rejected blocks", }, []string{"slot", "error"}, ), OptimisticPayloadBuildSuccess: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blockchain", - Name: "optimistic_payload_build_success", - Help: "Number of successful optimistic payload builds", + Name: "beacon_kit_blockchain_optimistic_payload_build_success", + Help: "Number of successful optimistic payload builds", }, []string{"slot"}, ), OptimisticPayloadBuildFailure: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blockchain", - Name: "optimistic_payload_build_failure", - Help: "Number of failed optimistic payload builds", + Name: "beacon_kit_blockchain_optimistic_payload_build_failure", + Help: "Number of failed optimistic payload builds", }, []string{"slot", "error"}, ), - StateRootVerificationDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "blockchain", - Name: "state_root_verification_duration", - Help: "Time taken to verify state root in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + StateRootVerificationDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_blockchain_state_root_verification_duration", + Help: "Time taken to verify state root in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), FailedToGetBlockLogs: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_deposit", - Name: "failed_to_get_block_logs", - Help: "Number of times failed to read deposits from execution layer block logs", + Name: "beacon_kit_execution_deposit_failed_to_get_block_logs", + Help: "Number of times failed to read deposits from execution layer block logs", }, []string{"block_num"}, ), FailedToEnqueueDeposits: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_deposit", - Name: "failed_to_enqueue_deposits", - Help: "Number of times failed to enqueue deposits to storage", + Name: "beacon_kit_execution_deposit_failed_to_enqueue_deposits", + Help: "Number of times failed to enqueue deposits to storage", }, []string{"block_num"}, ), diff --git a/beacon/validator/metrics.go b/beacon/validator/metrics.go index 1dc606a831..40ceadb77b 100644 --- a/beacon/validator/metrics.go +++ b/beacon/validator/metrics.go @@ -25,16 +25,17 @@ import ( "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" - "github.com/prometheus/client_golang/prometheus" ) // Metrics is a struct that contains metrics for the validator service. type Metrics struct { // RequestBlockForProposalDuration tracks time to request block for proposal - RequestBlockForProposalDuration metrics.Histogram + // Using Summary for backward compatibility with cosmos-sdk/telemetry. + RequestBlockForProposalDuration metrics.Summary // StateRootComputationDuration tracks time to compute state root - StateRootComputationDuration metrics.Histogram + // Using Summary for backward compatibility with cosmos-sdk/telemetry. + StateRootComputationDuration metrics.Summary // FailedToRetrievePayload tracks failed payload retrievals FailedToRetrievePayload metrics.Counter @@ -44,29 +45,26 @@ type Metrics struct { // Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ - RequestBlockForProposalDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "validator", - Name: "request_block_for_proposal_duration", - Help: "Time taken to request block for proposal in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + RequestBlockForProposalDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_validator_request_block_for_proposal_duration", + Help: "Time taken to request block for proposal in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - StateRootComputationDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "validator", - Name: "state_root_computation_duration", - Help: "Time taken to compute state root in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + StateRootComputationDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_validator_state_root_computation_duration", + Help: "Time taken to compute state root in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), FailedToRetrievePayload: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "validator", - Name: "failed_to_retrieve_payload", - Help: "Number of times validator failed to retrieve payload", + Name: "beacon_kit_validator_failed_to_retrieve_payload", + Help: "Number of times validator failed to retrieve payload", }, []string{"slot", "error"}, ), diff --git a/config/config/config.go b/config/config/config.go index 311eabdbbc..792d1652c1 100644 --- a/config/config/config.go +++ b/config/config/config.go @@ -82,6 +82,9 @@ type TelemetryConfig struct { // When true, all beacon-kit services emit Prometheus metrics. // When false, all metrics are no-op with zero runtime overhead. Enabled bool `mapstructure:"enabled"` + + // ServiceName defines the namespace for all Prometheus metrics. + ServiceName string `mapstructure:"service-name"` } // Config defines the server's top level configuration. @@ -106,7 +109,8 @@ func DefaultConfig() *Config { IAVLDisableFastNode: false, }, Telemetry: TelemetryConfig{ - Enabled: false, + Enabled: false, + ServiceName: "beacond_node", }, } } diff --git a/config/config/config.toml.tpl b/config/config/config.toml.tpl index dbe5793cc6..77b3b16a1d 100644 --- a/config/config/config.toml.tpl +++ b/config/config/config.toml.tpl @@ -50,7 +50,7 @@ inter-block-cache = {{ .BaseConfig.InterBlockCache }} # IavlCacheSize set the size of the iavl tree cache (in number of nodes). iavl-cache-size = {{ .BaseConfig.IAVLCacheSize }} -# IAVLDisableFastNode enables or disables the fast node feature of IAVL. +# IAVLDisableFastNode enables or disables the fast node feature of IAVL. # Default is false. iavl-disable-fastnode = {{ .BaseConfig.IAVLDisableFastNode }} @@ -66,3 +66,6 @@ iavl-disable-fastnode = {{ .BaseConfig.IAVLDisableFastNode }} # When false, all metrics are no-op with zero runtime overhead. # Default: false enabled = {{ .Telemetry.Enabled }} + +# ServiceName defines the namespace prefix for all Prometheus metrics. +service-name = "{{ .Telemetry.ServiceName }}" diff --git a/consensus/cometbft/service/metrics.go b/consensus/cometbft/service/metrics.go index 9f9c5467cd..067a42df15 100644 --- a/consensus/cometbft/service/metrics.go +++ b/consensus/cometbft/service/metrics.go @@ -24,64 +24,55 @@ import ( "time" "github.com/berachain/beacon-kit/observability/metrics" - prominternal "github.com/prometheus/client_golang/prometheus" ) // Metrics holds metrics for the CometBFT service. -// -// Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. type Metrics struct { // QueryCount tracks the number of ABCI queries received, labeled by query path. QueryCount metrics.Counter // QueryDuration tracks the time taken to process ABCI queries, labeled by query path. - QueryDuration metrics.Histogram + QueryDuration metrics.Summary // PrepareProposalDuration tracks the time taken to prepare a proposal. - PrepareProposalDuration metrics.Histogram + PrepareProposalDuration metrics.Summary // ProcessProposalDuration tracks the time taken to process a proposal. - ProcessProposalDuration metrics.Histogram + ProcessProposalDuration metrics.Summary } -// NewMetrics creates a new Metrics instance using the provided factory. -// The factory determines whether real Prometheus metrics or no-op metrics are created. -// -//nolint:mnd // magic numbers are histogram bucket ranges for timing metrics +// NewMetrics creates a new Metrics instance using the provided factory. The factory determines +// whether real Prometheus metrics or no-op metrics are created. func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ QueryCount: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "comet", - Name: "query_count", - Help: "Total number of ABCI queries received", + Name: "beacon_kit_comet_query_count", + Help: "Total number of ABCI queries received", }, []string{"path"}, ), - QueryDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "comet", - Name: "query_duration", - Help: "Time taken to process ABCI queries in seconds", - Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + QueryDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_comet_query_duration", + Help: "Time taken to process ABCI queries in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"path"}, ), - PrepareProposalDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "runtime", - Name: "prepare_proposal_duration", - Help: "Time taken to prepare a proposal in seconds", - Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + PrepareProposalDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_runtime_prepare_proposal_duration", + Help: "Time taken to prepare a proposal in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - ProcessProposalDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "runtime", - Name: "process_proposal_duration", - Help: "Time taken to process a proposal in seconds", - Buckets: prominternal.ExponentialBucketsRange(0.001, 10, 10), + ProcessProposalDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_runtime_process_proposal_duration", + Help: "Time taken to process a proposal in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), diff --git a/da/blob/factory_metrics.go b/da/blob/factory_metrics.go index e3cb382e48..59382e9be0 100644 --- a/da/blob/factory_metrics.go +++ b/da/blob/factory_metrics.go @@ -25,54 +25,49 @@ import ( "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" - "github.com/prometheus/client_golang/prometheus" ) // FactoryMetrics is a struct that contains metrics for the sidecar factory. type FactoryMetrics struct { - BuildSidecarDuration metrics.Histogram - BuildKZGInclusionProofDuration metrics.Histogram - BuildBlockBodyProofDuration metrics.Histogram - BuildCommitmentProofDuration metrics.Histogram + BuildSidecarDuration metrics.Summary + BuildKZGInclusionProofDuration metrics.Summary + BuildBlockBodyProofDuration metrics.Summary + BuildCommitmentProofDuration metrics.Summary } // NewFactoryMetrics returns a new FactoryMetrics instance. // Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { return &FactoryMetrics{ - BuildSidecarDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_factory", - Name: "build_sidecar_duration", - Help: "Time taken to build blob sidecars in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + BuildSidecarDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_factory_build_sidecar_duration", + Help: "Time taken to build blob sidecars in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, ), - BuildKZGInclusionProofDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_factory", - Name: "build_kzg_inclusion_proof_duration", - Help: "Time taken to build KZG inclusion proof in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + BuildKZGInclusionProofDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_factory_build_kzg_inclusion_proof_duration", + Help: "Time taken to build KZG inclusion proof in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - BuildBlockBodyProofDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_factory", - Name: "build_block_body_proof_duration", - Help: "Time taken to build block body proof in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + BuildBlockBodyProofDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_factory_build_block_body_proof_duration", + Help: "Time taken to build block body proof in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - BuildCommitmentProofDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_factory", - Name: "build_commitment_proof_duration", - Help: "Time taken to build commitment proof in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + BuildCommitmentProofDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_factory_build_commitment_proof_duration", + Help: "Time taken to build commitment proof in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), diff --git a/da/blob/processor_metrics.go b/da/blob/processor_metrics.go index 306b973812..64ce6836e0 100644 --- a/da/blob/processor_metrics.go +++ b/da/blob/processor_metrics.go @@ -25,34 +25,31 @@ import ( "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" - "github.com/prometheus/client_golang/prometheus" ) // ProcessorMetrics is a struct that contains metrics for the blob processor. type ProcessorMetrics struct { - VerifyBlobsDuration metrics.Histogram - ProcessBlobDuration metrics.Histogram + VerifyBlobsDuration metrics.Summary + ProcessBlobDuration metrics.Summary } // NewProcessorMetrics returns a new ProcessorMetrics instance. // Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. func NewProcessorMetrics(factory metrics.Factory) *ProcessorMetrics { return &ProcessorMetrics{ - VerifyBlobsDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_processor", - Name: "verify_blobs_duration", - Help: "Time taken to verify blob sidecars in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + VerifyBlobsDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_processor_verify_blobs_duration", + Help: "Time taken to verify blob sidecars in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, ), - ProcessBlobDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_processor", - Name: "process_blob_duration", - Help: "Time taken to process blob sidecars in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + ProcessBlobDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_processor_process_blob_duration", + Help: "Time taken to process blob sidecars in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, ), diff --git a/da/blob/verifier_metrics.go b/da/blob/verifier_metrics.go index 1e09453ffd..89b570cbce 100644 --- a/da/blob/verifier_metrics.go +++ b/da/blob/verifier_metrics.go @@ -25,44 +25,40 @@ import ( "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/primitives/math" - "github.com/prometheus/client_golang/prometheus" ) // VerifierMetrics is a struct that contains metrics for the blob verifier. type VerifierMetrics struct { - VerifyBlobsDuration metrics.Histogram - VerifyInclusionProofsDuration metrics.Histogram - VerifyKZGProofsDuration metrics.Histogram + VerifyBlobsDuration metrics.Summary + VerifyInclusionProofsDuration metrics.Summary + VerifyKZGProofsDuration metrics.Summary } // NewVerifierMetrics returns a new VerifierMetrics instance. // Metric names are kept identical to cosmos-sdk/telemetry output for Grafana compatibility. func NewVerifierMetrics(factory metrics.Factory) *VerifierMetrics { return &VerifierMetrics{ - VerifyBlobsDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_verifier", - Name: "verify_blobs_duration", - Help: "Time taken to verify blobs in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + VerifyBlobsDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_verifier_verify_blobs_duration", + Help: "Time taken to verify blobs in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars", "kzg_implementation"}, ), - VerifyInclusionProofsDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_verifier", - Name: "verify_inclusion_proofs_duration", - Help: "Time taken to verify inclusion proofs in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + VerifyInclusionProofsDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_verifier_verify_inclusion_proofs_duration", + Help: "Time taken to verify inclusion proofs in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, ), - VerifyKZGProofsDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "da_blob_verifier", - Name: "verify_kzg_proofs_duration", - Help: "Time taken to verify KZG proofs in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + VerifyKZGProofsDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_da_blob_verifier_verify_kzg_proofs_duration", + Help: "Time taken to verify KZG proofs in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars", "kzg_implementation"}, ), diff --git a/da/blobreactor/metrics.go b/da/blobreactor/metrics.go index 19874c6bd6..ff65d067ad 100644 --- a/da/blobreactor/metrics.go +++ b/da/blobreactor/metrics.go @@ -24,7 +24,6 @@ import ( "time" "github.com/berachain/beacon-kit/observability/metrics" - "github.com/prometheus/client_golang/prometheus" ) // Metric status constants for blob reactor requests. @@ -44,7 +43,7 @@ const ( // Metrics contains metrics for the blob reactor P2P operations. type Metrics struct { RequestTotal metrics.Counter - RequestDuration metrics.Histogram + RequestDuration metrics.Summary PeerAttemptsTotal metrics.Counter WorkerPoolFullTotal metrics.Counter ActiveRequests metrics.Gauge @@ -58,58 +57,51 @@ func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ RequestTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blobreactor", - Name: "request_total", - Help: "Total number of blob requests completed", + Name: "beacon_kit_blobreactor_request_total", + Help: "Total number of blob requests completed", }, []string{"status"}, ), - RequestDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "blobreactor", - Name: "request_duration", - Help: "Time taken to complete blob requests in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + RequestDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_blobreactor_request_duration", + Help: "Time taken to complete blob requests in seconds", + Objectives: metrics.QuantilesP50P90P99, }, []string{"status"}, ), PeerAttemptsTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blobreactor", - Name: "peer_attempts_total", - Help: "Total number of peer attempts for blob requests", + Name: "beacon_kit_blobreactor_peer_attempts_total", + Help: "Total number of peer attempts for blob requests", }, []string{"status"}, ), WorkerPoolFullTotal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "blobreactor", - Name: "worker_pool_full_total", - Help: "Number of times worker pool was full and messages were dropped", + Name: "beacon_kit_blobreactor_worker_pool_full_total", + Help: "Number of times worker pool was full and messages were dropped", }, []string{"message_type"}, ), ActiveRequests: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "blobreactor", - Name: "active_requests", - Help: "Number of currently active blob requests", + Name: "beacon_kit_blobreactor_active_requests", + Help: "Number of currently active blob requests", }, nil, ), PeersAvailable: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "blobreactor", - Name: "peers_available", - Help: "Number of available peers for blob requests", + Name: "beacon_kit_blobreactor_peers_available", + Help: "Number of available peers for blob requests", }, nil, ), PeersTotal: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "blobreactor", - Name: "peers_total", - Help: "Total number of connected peers", + Name: "beacon_kit_blobreactor_peers_total", + Help: "Total number of connected peers", }, nil, ), diff --git a/execution/client/metrics.go b/execution/client/metrics.go index dfb52dcdd4..7f3e18a940 100644 --- a/execution/client/metrics.go +++ b/execution/client/metrics.go @@ -25,15 +25,14 @@ import ( "github.com/berachain/beacon-kit/log" "github.com/berachain/beacon-kit/observability/metrics" - "github.com/prometheus/client_golang/prometheus" ) // Metrics is a struct that contains metrics for the execution client. type Metrics struct { // Duration histograms - ForkchoiceUpdateDuration metrics.Histogram - NewPayloadDuration metrics.Histogram - GetPayloadDuration metrics.Histogram + ForkchoiceUpdateDuration metrics.Summary + NewPayloadDuration metrics.Summary + GetPayloadDuration metrics.Summary // Timeout counters EngineAPITimeout metrics.Counter @@ -64,30 +63,27 @@ type Metrics struct { func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { return &Metrics{ // Duration histograms - ForkchoiceUpdateDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "execution_client", - Name: "forkchoice_update_duration", - Help: "Time taken for forkchoice update in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + ForkchoiceUpdateDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_execution_client_forkchoice_update_duration", + Help: "Time taken for forkchoice update in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - NewPayloadDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "execution_client", - Name: "new_payload_duration", - Help: "Time taken for new payload in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + NewPayloadDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_execution_client_new_payload_duration", + Help: "Time taken for new payload in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), - GetPayloadDuration: factory.NewHistogram( - metrics.HistogramOpts{ - Subsystem: "execution_client", - Name: "get_payload_duration", - Help: "Time taken for get payload in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), + GetPayloadDuration: factory.NewSummary( + metrics.SummaryOpts{ + Name: "beacon_kit_execution_client_get_payload_duration", + Help: "Time taken for get payload in seconds", + Objectives: metrics.QuantilesP50P90P99, }, nil, ), @@ -95,41 +91,36 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { // Timeout counters EngineAPITimeout: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "engine_api_timeout", - Help: "Number of engine API timeouts", + Name: "beacon_kit_execution_client_engine_api_timeout", + Help: "Number of engine API timeouts", }, nil, ), ForkchoiceUpdateDurationTimeout: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "forkchoice_update_duration_timeout", - Help: "Number of forkchoice update timeouts", + Name: "beacon_kit_execution_client_forkchoice_update_duration_timeout", + Help: "Number of forkchoice update timeouts", }, nil, ), NewPayloadDurationTimeout: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "new_payload_duration_timeout", - Help: "Number of new payload timeouts", + Name: "beacon_kit_execution_client_new_payload_duration_timeout", + Help: "Number of new payload timeouts", }, nil, ), GetPayloadDurationTimeout: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "get_payload_duration_timeout", - Help: "Number of get payload timeouts", + Name: "beacon_kit_execution_client_get_payload_duration_timeout", + Help: "Number of get payload timeouts", }, nil, ), HTTPTimeout: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "http_timeout", - Help: "Number of HTTP timeouts", + Name: "beacon_kit_execution_client_http_timeout", + Help: "Number of HTTP timeouts", }, nil, ), @@ -137,81 +128,71 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { // Error counters ParseError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "parse_error", - Help: "Number of parse errors", + Name: "beacon_kit_execution_client_parse_error", + Help: "Number of parse errors", }, nil, ), InvalidRequest: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "invalid_request", - Help: "Number of invalid requests", + Name: "beacon_kit_execution_client_invalid_request", + Help: "Number of invalid requests", }, nil, ), MethodNotFound: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "method_not_found", - Help: "Number of method not found errors", + Name: "beacon_kit_execution_client_method_not_found", + Help: "Number of method not found errors", }, nil, ), InvalidParams: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "invalid_params", - Help: "Number of invalid params errors", + Name: "beacon_kit_execution_client_invalid_params", + Help: "Number of invalid params errors", }, nil, ), InternalError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "internal_error", - Help: "Number of internal errors", + Name: "beacon_kit_execution_client_internal_error", + Help: "Number of internal errors", }, nil, ), UnknownPayloadError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "unknown_payload_error", - Help: "Number of unknown payload errors", + Name: "beacon_kit_execution_client_unknown_payload_error", + Help: "Number of unknown payload errors", }, nil, ), InvalidForkchoiceState: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "invalid_forkchoice_state", - Help: "Number of invalid forkchoice state errors", + Name: "beacon_kit_execution_client_invalid_forkchoice_state", + Help: "Number of invalid forkchoice state errors", }, nil, ), InvalidPayloadAttributes: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "invalid_payload_attributes", - Help: "Number of invalid payload attributes errors", + Name: "beacon_kit_execution_client_invalid_payload_attributes", + Help: "Number of invalid payload attributes errors", }, nil, ), RequestTooLarge: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "request_too_large", - Help: "Number of request too large errors", + Name: "beacon_kit_execution_client_request_too_large", + Help: "Number of request too large errors", }, nil, ), InternalServerError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_client", - Name: "internal_server_error", - Help: "Number of internal server errors", + Name: "beacon_kit_execution_client_internal_server_error", + Help: "Number of internal server errors", }, nil, ), diff --git a/execution/engine/metrics.go b/execution/engine/metrics.go index 3753958432..060096c120 100644 --- a/execution/engine/metrics.go +++ b/execution/engine/metrics.go @@ -65,65 +65,57 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { // New payload metrics NewPayload: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload", - Help: "Number of new payload calls", + Name: "beacon_kit_execution_engine_new_payload", + Help: "Number of new payload calls", }, []string{"payload_block_hash", "payload_parent_block_hash"}, ), NewPayloadValid: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_valid", - Help: "Number of valid new payload responses", + Name: "beacon_kit_execution_engine_new_payload_valid", + Help: "Number of valid new payload responses", }, nil, ), NewPayloadAcceptedPayloadStatus: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_accepted_payload_status", - Help: "Number of accepted payload status responses", + Name: "beacon_kit_execution_engine_new_payload_accepted_payload_status", + Help: "Number of accepted payload status responses", }, nil, ), NewPayloadSyncingPayloadStatus: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_syncing_payload_status", - Help: "Number of syncing payload status responses", + Name: "beacon_kit_execution_engine_new_payload_syncing_payload_status", + Help: "Number of syncing payload status responses", }, nil, ), NewPayloadInvalidPayloadStatus: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_invalid_payload_status", - Help: "Number of invalid payload status responses", + Name: "beacon_kit_execution_engine_new_payload_invalid_payload_status", + Help: "Number of invalid payload status responses", }, nil, ), NewPayloadNonFatalError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_non_fatal_error", - Help: "Number of non-fatal errors during new payload", + Name: "beacon_kit_execution_engine_new_payload_non_fatal_error", + Help: "Number of non-fatal errors during new payload", }, []string{"error"}, ), NewPayloadFatalError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_fatal_error", - Help: "Number of fatal errors during new payload", + Name: "beacon_kit_execution_engine_new_payload_fatal_error", + Help: "Number of fatal errors during new payload", }, []string{"error"}, ), NewPayloadUndefinedError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "new_payload_undefined_error", - Help: "Number of undefined errors during new payload", + Name: "beacon_kit_execution_engine_new_payload_undefined_error", + Help: "Number of undefined errors during new payload", }, []string{"error"}, ), @@ -131,57 +123,50 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { // Forkchoice update metrics ForkchoiceUpdate: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update", - Help: "Number of forkchoice update calls", + Name: "beacon_kit_execution_engine_forkchoice_update", + Help: "Number of forkchoice update calls", }, []string{"has_payload_attributes"}, ), ForkchoiceUpdateValid: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_valid", - Help: "Number of valid forkchoice update responses", + Name: "beacon_kit_execution_engine_forkchoice_update_valid", + Help: "Number of valid forkchoice update responses", }, nil, ), ForkchoiceUpdateSyncing: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_syncing", - Help: "Number of syncing forkchoice update responses", + Name: "beacon_kit_execution_engine_forkchoice_update_syncing", + Help: "Number of syncing forkchoice update responses", }, []string{"error"}, ), ForkchoiceUpdateInvalid: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_invalid", - Help: "Number of invalid forkchoice update responses", + Name: "beacon_kit_execution_engine_forkchoice_update_invalid", + Help: "Number of invalid forkchoice update responses", }, []string{"error"}, ), ForkchoiceUpdateFatalError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_fatal_error", - Help: "Number of fatal errors during forkchoice update", + Name: "beacon_kit_execution_engine_forkchoice_update_fatal_error", + Help: "Number of fatal errors during forkchoice update", }, []string{"error"}, ), ForkchoiceUpdateNonFatalError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_non_fatal_error", - Help: "Number of non-fatal errors during forkchoice update", + Name: "beacon_kit_execution_engine_forkchoice_update_non_fatal_error", + Help: "Number of non-fatal errors during forkchoice update", }, []string{"error"}, ), ForkchoiceUpdateUndefinedError: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "execution_engine", - Name: "forkchoice_update_undefined_error", - Help: "Number of undefined errors during forkchoice update", + Name: "beacon_kit_execution_engine_forkchoice_update_undefined_error", + Help: "Number of undefined errors during forkchoice update", }, []string{"error"}, ), diff --git a/node-core/components/metrics_factory.go b/node-core/components/metrics_factory.go index c33fc2be2a..0dbea28af2 100644 --- a/node-core/components/metrics_factory.go +++ b/node-core/components/metrics_factory.go @@ -39,7 +39,7 @@ type MetricsFactoryInput struct { // This setting affects ALL metrics in the beacon-kit system. func ProvideMetricsFactory(in MetricsFactoryInput) metrics.Factory { if in.Config.Telemetry.Enabled { - return prometheus.NewFactory("beacon_kit") + return prometheus.NewFactory(in.Config.Telemetry.ServiceName) } return discard.NewFactory() } diff --git a/observability/metrics/discard/discard.go b/observability/metrics/discard/discard.go index 1ee15b89ab..7d0dc67520 100644 --- a/observability/metrics/discard/discard.go +++ b/observability/metrics/discard/discard.go @@ -26,17 +26,6 @@ import "github.com/berachain/beacon-kit/observability/metrics" type Factory struct{} // NewFactory creates a new no-op metrics factory. -// All metrics created by this factory have zero runtime overhead. -// -// Example: -// -// factory := discard.NewFactory() -// counter := factory.NewCounter(metrics.CounterOpts{ -// Subsystem: "blockchain", -// Name: "blocks_total", -// Help: "Total number of blocks processed", -// }, []string{"status"}) -// counter.Add(1) // This is a no-op, compiled away by the Go compiler func NewFactory() metrics.Factory { return Factory{} } @@ -56,9 +45,12 @@ func (Factory) NewHistogram(metrics.HistogramOpts, []string) metrics.Histogram { return NewHistogram() } +// NewSummary returns a no-op Summary. +func (Factory) NewSummary(metrics.SummaryOpts, []string) metrics.Summary { + return NewSummary() +} + // noOpCounter is a no-op implementation of metrics.Counter. -// All operations are compiled away by the Go compiler, providing -// zero runtime overhead when telemetry is disabled. type noOpCounter struct{} // NewCounter returns a no-op Counter. @@ -75,8 +67,6 @@ func (noOpCounter) With(...string) metrics.Counter { func (noOpCounter) Add(float64) {} // noOpGauge is a no-op implementation of metrics.Gauge. -// All operations are compiled away by the Go compiler, providing -// zero runtime overhead when telemetry is disabled. type noOpGauge struct{} // NewGauge returns a no-op Gauge. @@ -96,8 +86,6 @@ func (noOpGauge) Set(float64) {} func (noOpGauge) Add(float64) {} // noOpHistogram is a no-op implementation of metrics.Histogram. -// All operations are compiled away by the Go compiler, providing -// zero runtime overhead when telemetry is disabled. type noOpHistogram struct{} // NewHistogram returns a no-op Histogram. @@ -112,3 +100,19 @@ func (noOpHistogram) With(...string) metrics.Histogram { // Observe does nothing. func (noOpHistogram) Observe(float64) {} + +// noOpSummary is a no-op implementation of metrics.Summary. +type noOpSummary struct{} + +// NewSummary returns a no-op Summary. +func NewSummary() metrics.Summary { + return noOpSummary{} +} + +// With returns the same no-op Summary. +func (noOpSummary) With(...string) metrics.Summary { + return noOpSummary{} +} + +// Observe does nothing. +func (noOpSummary) Observe(float64) {} diff --git a/observability/metrics/lv/labelvalues.go b/observability/metrics/lv/labelvalues.go index 2586bdbf53..d70662c228 100644 --- a/observability/metrics/lv/labelvalues.go +++ b/observability/metrics/lv/labelvalues.go @@ -23,30 +23,10 @@ package lv // LabelValues is a type alias for a slice of strings that represent // metric label key-value pairs. It provides efficient label accumulation // using a copy-on-write pattern. -// -// Label values should be provided as alternating key-value pairs: -// ["key1", "value1", "key2", "value2", ...] -// -// Example: -// -// var lvs lv.LabelValues -// lvs = lvs.With("method", "GET") -// lvs = lvs.With("status", "200") -// // lvs now contains: ["method", "GET", "status", "200"] type LabelValues []string // With returns a new LabelValues with the given label key-value pairs appended. // The original LabelValues is not modified (copy-on-write semantics). -// -// If the number of labelValues is odd, "unknown" is appended as the value -// for the last key to ensure all labels have values. -// -// Example: -// -// var lvs lv.LabelValues -// lvs1 := lvs.With("method", "GET") -// lvs2 := lvs1.With("status", "200") -// // lvs1 and lvs2 are independent func (lvs LabelValues) With(labelValues ...string) LabelValues { if len(labelValues)%2 != 0 { labelValues = append(labelValues, "unknown") diff --git a/observability/metrics/metrics.go b/observability/metrics/metrics.go index c92e8ab385..aacf2102f0 100644 --- a/observability/metrics/metrics.go +++ b/observability/metrics/metrics.go @@ -20,6 +20,15 @@ package metrics +// QuantilesP50P90P99 defines standard quantiles for Summary metrics. +// +//nolint:gochecknoglobals,mnd // standard quantile definitions +var QuantilesP50P90P99 = map[float64]float64{ + 0.5: 0.05, // p50 (median) with ±5% error + 0.9: 0.01, // p90 with ±1% error + 0.99: 0.001, // p99 with ±0.1% error +} + // Counter represents a monotonically increasing metric. type Counter interface { // Add increments the counter by the given delta. @@ -65,29 +74,52 @@ type Histogram interface { With(labelValues ...string) Histogram } +// Summary represents a metric that samples observations and calculates +// configurable quantiles over a sliding time window. Summaries are used +// for tracking distributions similar to histograms but with pre-calculated +// quantiles. +// +// A summary automatically provides: +// - Sum of all observed values +// - Count of observations +// - Pre-defined quantiles (e.g., 0.5, 0.9, 0.99) +type Summary interface { + // Observe adds a single observation to the summary. + Observe(value float64) + + // With returns a new Summary with the given label values applied. + // Label values are provided as key-value pairs. + // If the number of label values is odd, "unknown" is appended. + With(labelValues ...string) Summary +} + // CounterOpts defines options for creating a counter metric. type CounterOpts struct { - Subsystem string - Name string - Help string + Name string + Help string } // GaugeOpts defines options for creating a gauge metric. type GaugeOpts struct { - Subsystem string - Name string - Help string + Name string + Help string } // HistogramOpts defines options for creating a histogram metric. type HistogramOpts struct { - Subsystem string - Name string - Help string - Buckets []float64 + Name string + Help string + Buckets []float64 } -// Factory creates metrics instances (Counter, Gauge, Histogram). +// SummaryOpts defines options for creating a summary metric. +type SummaryOpts struct { + Name string + Help string + Objectives map[float64]float64 // Quantile ranks to track (e.g., 0.5, 0.9, 0.99) +} + +// Factory creates metrics instances (Counter, Gauge, Histogram, Summary). // Implementations include PrometheusFactory and NoOpFactory. type Factory interface { // NewCounter creates a new Counter with the given options and label names. @@ -98,4 +130,7 @@ type Factory interface { // NewHistogram creates a new Histogram with the given options and label names. NewHistogram(opts HistogramOpts, labelNames []string) Histogram + + // NewSummary creates a new Summary with the given options and label names. + NewSummary(opts SummaryOpts, labelNames []string) Summary } diff --git a/observability/metrics/prometheus/prometheus.go b/observability/metrics/prometheus/prometheus.go index 597a515dd3..dd0eaea530 100644 --- a/observability/metrics/prometheus/prometheus.go +++ b/observability/metrics/prometheus/prometheus.go @@ -32,15 +32,6 @@ type Factory struct { } // NewFactory creates a new Prometheus metrics factory with the given namespace. -// -// Example: -// -// factory := prometheus.NewFactory("beacon_kit") -// counter := factory.NewCounter(metrics.CounterOpts{ -// Subsystem: "blockchain", -// Name: "blocks_total", -// Help: "Total number of blocks processed", -// }, []string{"status"}) func NewFactory(namespace string) metrics.Factory { return &Factory{namespace: namespace} } @@ -49,7 +40,6 @@ func NewFactory(namespace string) metrics.Factory { func (f *Factory) NewCounter(opts metrics.CounterOpts, labelNames []string) metrics.Counter { return NewCounter(prometheus.CounterOpts{ Namespace: f.namespace, - Subsystem: opts.Subsystem, Name: opts.Name, Help: opts.Help, }, labelNames) @@ -59,7 +49,6 @@ func (f *Factory) NewCounter(opts metrics.CounterOpts, labelNames []string) metr func (f *Factory) NewGauge(opts metrics.GaugeOpts, labelNames []string) metrics.Gauge { return NewGauge(prometheus.GaugeOpts{ Namespace: f.namespace, - Subsystem: opts.Subsystem, Name: opts.Name, Help: opts.Help, }, labelNames) @@ -69,13 +58,22 @@ func (f *Factory) NewGauge(opts metrics.GaugeOpts, labelNames []string) metrics. func (f *Factory) NewHistogram(opts metrics.HistogramOpts, labelNames []string) metrics.Histogram { return NewHistogram(prometheus.HistogramOpts{ Namespace: f.namespace, - Subsystem: opts.Subsystem, Name: opts.Name, Help: opts.Help, Buckets: opts.Buckets, }, labelNames) } +// NewSummary creates a new Summary that registers with prometheus.DefaultRegisterer. +func (f *Factory) NewSummary(opts metrics.SummaryOpts, labelNames []string) metrics.Summary { + return NewSummary(prometheus.SummaryOpts{ + Namespace: f.namespace, + Name: opts.Name, + Help: opts.Help, + Objectives: opts.Objectives, + }, labelNames) +} + // counter wraps a prometheus.CounterVec and implements the metrics.Counter interface. type counter struct { cv *prometheus.CounterVec @@ -83,15 +81,6 @@ type counter struct { } // NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. -// -// Example: -// -// c := prometheus.NewCounter(prometheus.CounterOpts{ -// Namespace: "beacon_kit", -// Subsystem: "blockchain", -// Name: "total_blocks", -// Help: "Total number of blocks processed", -// }, []string{"status"}) func NewCounter(opts prometheus.CounterOpts, labelNames []string) metrics.Counter { cv := prometheus.NewCounterVec(opts, labelNames) prometheus.MustRegister(cv) @@ -127,15 +116,6 @@ type gauge struct { } // NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. -// -// Example: -// -// g := prometheus.NewGauge(prometheus.GaugeOpts{ -// Namespace: "beacon_kit", -// Subsystem: "blockchain", -// Name: "queue_depth", -// Help: "Current depth of the processing queue", -// }, []string{"queue_name"}) func NewGauge(opts prometheus.GaugeOpts, labelNames []string) metrics.Gauge { gv := prometheus.NewGaugeVec(opts, labelNames) prometheus.MustRegister(gv) @@ -176,16 +156,6 @@ type histogram struct { } // NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. -// -// Example: -// -// h := prometheus.NewHistogram(prometheus.HistogramOpts{ -// Namespace: "beacon_kit", -// Subsystem: "blockchain", -// Name: "block_processing_duration_seconds", -// Help: "Time spent processing blocks", -// Buckets: prometheus.ExponentialBucketsRange(0.001, 10, 10), -// }, []string{"block_type"}) func NewHistogram(opts prometheus.HistogramOpts, labelNames []string) metrics.Histogram { hv := prometheus.NewHistogramVec(opts, labelNames) prometheus.MustRegister(hv) @@ -214,6 +184,41 @@ func (h *histogram) Observe(value float64) { h.hv.With(makeLabels(h.lvs...)).Observe(value) } +// summary wraps a prometheus.SummaryVec and implements the metrics.Summary interface. +type summary struct { + sv *prometheus.SummaryVec + lvs lv.LabelValues +} + +// NewSummary creates a new Summary that registers with prometheus.DefaultRegisterer. +func NewSummary(opts prometheus.SummaryOpts, labelNames []string) metrics.Summary { + sv := prometheus.NewSummaryVec(opts, labelNames) + prometheus.MustRegister(sv) + return &summary{sv: sv} +} + +// NewSummaryFrom creates a new Summary from an existing prometheus.SummaryVec. +// The SummaryVec must already be registered. +func NewSummaryFrom(sv *prometheus.SummaryVec, labelValues ...string) metrics.Summary { + return &summary{ + sv: sv, + lvs: lv.LabelValues(labelValues), + } +} + +// With returns a new Summary with the given label values applied. +func (s *summary) With(labelValues ...string) metrics.Summary { + return &summary{ + sv: s.sv, + lvs: s.lvs.With(labelValues...), + } +} + +// Observe adds a single observation to the summary. +func (s *summary) Observe(value float64) { + s.sv.With(makeLabels(s.lvs...)).Observe(value) +} + // makeLabels converts a slice of label key-value pairs into a prometheus.Labels map. // The input slice should contain alternating keys and values. // diff --git a/state-transition/core/metrics.go b/state-transition/core/metrics.go index cde0690a62..4ed9d918c7 100644 --- a/state-transition/core/metrics.go +++ b/state-transition/core/metrics.go @@ -43,65 +43,57 @@ func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ BlockTxGasUsed: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "state", - Name: "block_tx_gas_used", - Help: "Transaction gas used in the block", + Name: "beacon_kit_state_block_tx_gas_used", + Help: "Transaction gas used in the block", }, []string{"block_number"}, ), BlockBlobGasUsed: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "state", - Name: "block_blob_gas_used", - Help: "Blob gas used in the block", + Name: "beacon_kit_state_block_blob_gas_used", + Help: "Blob gas used in the block", }, []string{"block_number"}, ), PartialWithdrawalsEnqueued: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "state", - Name: "partial_withdrawals_enqueued", - Help: "Number of partial withdrawals enqueued", + Name: "beacon_kit_state_partial_withdrawals_enqueued", + Help: "Number of partial withdrawals enqueued", }, nil, ), PayloadConsensusTimestampDiff: factory.NewGauge( metrics.GaugeOpts{ - Subsystem: "state", - Name: "payload_consensus_timestamp_diff", - Help: "Difference between payload timestamp and consensus timestamp", + Name: "beacon_kit_state_payload_consensus_timestamp_diff", + Help: "Difference between payload timestamp and consensus timestamp", }, nil, ), DepositStakeLost: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "state", - Name: "deposit_stake_lost", - Help: "Number of deposits with stake lost", + Name: "beacon_kit_state_deposit_stake_lost", + Help: "Number of deposits with stake lost", }, nil, ), PartialWithdrawalRequestDropped: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "state", - Name: "partial_withdrawal_request_dropped", - Help: "Number of partial withdrawal requests dropped", + Name: "beacon_kit_state_partial_withdrawal_request_dropped", + Help: "Number of partial withdrawal requests dropped", }, nil, ), PartialWithdrawalRequestInvalid: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "state", - Name: "partial_withdrawal_request_invalid", - Help: "Number of invalid partial withdrawal requests", + Name: "beacon_kit_state_partial_withdrawal_request_invalid", + Help: "Number of invalid partial withdrawal requests", }, nil, ), ValidatorNotWithdrawable: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "state", - Name: "validator_not_withdrawable", - Help: "Number of validators not withdrawable", + Name: "beacon_kit_state_validator_not_withdrawable", + Help: "Number of validators not withdrawable", }, nil, ), diff --git a/state-transition/core/state/metrics.go b/state-transition/core/state/metrics.go index 73089d378a..3a4f2c9ab9 100644 --- a/state-transition/core/state/metrics.go +++ b/state-transition/core/state/metrics.go @@ -39,17 +39,15 @@ func NewMetrics(factory metrics.Factory) *Metrics { return &Metrics{ PartialWithdrawalRequestInvalid: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "statedb", - Name: "partial_withdrawal_request_invalid", - Help: "Number of invalid partial withdrawal requests", + Name: "beacon_kit_statedb_partial_withdrawal_request_invalid", + Help: "Number of invalid partial withdrawal requests", }, nil, ), ExcessStakePartialWithdrawal: factory.NewCounter( metrics.CounterOpts{ - Subsystem: "statedb", - Name: "excess_stake_partial_withdrawal", - Help: "Number of withdrawals created due to validator stake exceeding MaxEffectiveBalance", + Name: "beacon_kit_statedb_excess_stake_partial_withdrawal", + Help: "Number of withdrawals created due to validator stake exceeding MaxEffectiveBalance", }, nil, ), diff --git a/testing/networks/80069/app.toml b/testing/networks/80069/app.toml index b82f404379..e94654c7c8 100644 --- a/testing/networks/80069/app.toml +++ b/testing/networks/80069/app.toml @@ -78,6 +78,9 @@ app-db-backend = "pebbledb" # Default: false enabled = true +# ServiceName defines the namespace prefix for all Prometheus metrics. +service-name = "beacond_node" + ############################################################################### ### BeaconKit ### ############################################################################### diff --git a/testing/networks/80094/app.toml b/testing/networks/80094/app.toml index 55af21f89a..b0825c0728 100644 --- a/testing/networks/80094/app.toml +++ b/testing/networks/80094/app.toml @@ -78,6 +78,9 @@ app-db-backend = "pebbledb" # Default: false enabled = true +# ServiceName defines the namespace prefix for all Prometheus metrics. +service-name = "beacond_node" + ############################################################################### ### BeaconKit ### ############################################################################### From 0a4a541ccbf18e954dd420c46c86b8204a58e569 Mon Sep 17 00:00:00 2001 From: Fridrik Asmundsson Date: Fri, 24 Oct 2025 17:53:38 +0000 Subject: [PATCH 3/5] Add support for hostname and global constant labels in Prometheus metrics --- config/config/config.go | 12 ++++- config/config/config.toml.tpl | 11 ++-- node-core/components/metrics_factory.go | 51 +++++++++++++++++-- .../metrics/prometheus/prometheus.go | 49 ++++++++++++------ testing/networks/80069/app.toml | 9 ++-- testing/networks/80094/app.toml | 9 ++-- 6 files changed, 111 insertions(+), 30 deletions(-) diff --git a/config/config/config.go b/config/config/config.go index 792d1652c1..aefcfdff24 100644 --- a/config/config/config.go +++ b/config/config/config.go @@ -85,6 +85,12 @@ type TelemetryConfig struct { // ServiceName defines the namespace for all Prometheus metrics. ServiceName string `mapstructure:"service-name"` + + // EnableHostnameLabel enables adding hostname as a constant label to all metrics. + EnableHostnameLabel bool `mapstructure:"enable-hostname-label"` + + // GlobalLabels defines a global set of name/value label tuples applied to all metrics. + GlobalLabels [][]string `mapstructure:"global-labels"` } // Config defines the server's top level configuration. @@ -109,8 +115,10 @@ func DefaultConfig() *Config { IAVLDisableFastNode: false, }, Telemetry: TelemetryConfig{ - Enabled: false, - ServiceName: "beacond_node", + Enabled: true, + ServiceName: "", + EnableHostnameLabel: false, + GlobalLabels: [][]string{}, }, } } diff --git a/config/config/config.toml.tpl b/config/config/config.toml.tpl index 77b3b16a1d..095364f99c 100644 --- a/config/config/config.toml.tpl +++ b/config/config/config.toml.tpl @@ -62,10 +62,15 @@ iavl-disable-fastnode = {{ .BaseConfig.IAVLDisableFastNode }} [telemetry] # Enabled enables Prometheus metrics collection for all beacon-kit services. -# When true, metrics are exposed at the configured Prometheus endpoint. -# When false, all metrics are no-op with zero runtime overhead. -# Default: false enabled = {{ .Telemetry.Enabled }} # ServiceName defines the namespace prefix for all Prometheus metrics. service-name = "{{ .Telemetry.ServiceName }}" + +# EnableHostnameLabel enables adding hostname as a constant label to all metrics. +enable-hostname-label = {{ .Telemetry.EnableHostnameLabel }} + +# GlobalLabels defines a global set of name/value label tuples applied to all metrics. +{{ if gt (len .Telemetry.GlobalLabels) 0 }}global-labels = [{{ range $k, $v := .Telemetry.GlobalLabels }} + ["{{index $v 0 }}", "{{ index $v 1}}"],{{ end }} +]{{ else }}global-labels = []{{ end }} diff --git a/node-core/components/metrics_factory.go b/node-core/components/metrics_factory.go index 0dbea28af2..de8f1f83f5 100644 --- a/node-core/components/metrics_factory.go +++ b/node-core/components/metrics_factory.go @@ -21,16 +21,21 @@ package components import ( + "os" + "cosmossdk.io/depinject" "github.com/berachain/beacon-kit/config/config" + "github.com/berachain/beacon-kit/log/phuslu" "github.com/berachain/beacon-kit/observability/metrics" "github.com/berachain/beacon-kit/observability/metrics/discard" "github.com/berachain/beacon-kit/observability/metrics/prometheus" + promlib "github.com/prometheus/client_golang/prometheus" ) type MetricsFactoryInput struct { depinject.In Config *config.Config + Logger *phuslu.Logger } // ProvideMetricsFactory provides a metrics factory based on configuration. @@ -38,8 +43,48 @@ type MetricsFactoryInput struct { // When false, creates no-op metrics with zero runtime overhead. // This setting affects ALL metrics in the beacon-kit system. func ProvideMetricsFactory(in MetricsFactoryInput) metrics.Factory { - if in.Config.Telemetry.Enabled { - return prometheus.NewFactory(in.Config.Telemetry.ServiceName) + if !in.Config.Telemetry.Enabled { + return discard.NewFactory() + } + + // Build constant labels from config + constLabels := buildConstLabels(in.Config.Telemetry, in.Logger) + + // If we have any constant labels, create factory with labels + if len(constLabels) > 0 { + return prometheus.NewFactoryWithLabels( + in.Config.Telemetry.ServiceName, + constLabels, + ) + } + + return prometheus.NewFactory(in.Config.Telemetry.ServiceName) +} + +// buildConstLabels builds constant labels from telemetry config. +// It merges hostname label (if enabled) with global labels. +func buildConstLabels(cfg config.TelemetryConfig, logger *phuslu.Logger) promlib.Labels { + labels := make(promlib.Labels) + + // Add global labels from config + for _, labelPair := range cfg.GlobalLabels { + //nolint:mnd // label pairs are always [key, value] + if len(labelPair) != 2 { + logger.Warn("Invalid global label pair in telemetry config, must be [key, value]", "pair", labelPair) + continue + } + labels[labelPair[0]] = labelPair[1] } - return discard.NewFactory() + + // Add hostname label if enabled (takes precedence over global-labels) + if cfg.EnableHostnameLabel { + hostname, err := os.Hostname() + if err != nil { + logger.Warn("Failed to get hostname for metrics labels, continuing without hostname label", "error", err) + } else { + labels["host"] = hostname + } + } + + return labels } diff --git a/observability/metrics/prometheus/prometheus.go b/observability/metrics/prometheus/prometheus.go index dd0eaea530..dac74a205b 100644 --- a/observability/metrics/prometheus/prometheus.go +++ b/observability/metrics/prometheus/prometheus.go @@ -28,49 +28,66 @@ import ( // Factory creates Prometheus metrics and registers them with prometheus.DefaultRegisterer. type Factory struct { - namespace string + namespace string + constLabels prometheus.Labels } // NewFactory creates a new Prometheus metrics factory with the given namespace. func NewFactory(namespace string) metrics.Factory { - return &Factory{namespace: namespace} + return &Factory{ + namespace: namespace, + constLabels: nil, + } +} + +// NewFactoryWithLabels creates a new Prometheus metrics factory with constant labels. +// Constant labels are applied to all metrics created by this factory. +func NewFactoryWithLabels(namespace string, constLabels prometheus.Labels) metrics.Factory { + return &Factory{ + namespace: namespace, + constLabels: constLabels, + } } // NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. func (f *Factory) NewCounter(opts metrics.CounterOpts, labelNames []string) metrics.Counter { return NewCounter(prometheus.CounterOpts{ - Namespace: f.namespace, - Name: opts.Name, - Help: opts.Help, + Namespace: f.namespace, + Name: opts.Name, + Help: opts.Help, + ConstLabels: f.constLabels, }, labelNames) } // NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. func (f *Factory) NewGauge(opts metrics.GaugeOpts, labelNames []string) metrics.Gauge { return NewGauge(prometheus.GaugeOpts{ - Namespace: f.namespace, - Name: opts.Name, - Help: opts.Help, + Namespace: f.namespace, + Name: opts.Name, + Help: opts.Help, + ConstLabels: f.constLabels, }, labelNames) } // NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. func (f *Factory) NewHistogram(opts metrics.HistogramOpts, labelNames []string) metrics.Histogram { return NewHistogram(prometheus.HistogramOpts{ - Namespace: f.namespace, - Name: opts.Name, - Help: opts.Help, - Buckets: opts.Buckets, + Namespace: f.namespace, + Name: opts.Name, + Help: opts.Help, + Buckets: opts.Buckets, + ConstLabels: f.constLabels, }, labelNames) } // NewSummary creates a new Summary that registers with prometheus.DefaultRegisterer. func (f *Factory) NewSummary(opts metrics.SummaryOpts, labelNames []string) metrics.Summary { return NewSummary(prometheus.SummaryOpts{ - Namespace: f.namespace, - Name: opts.Name, - Help: opts.Help, - Objectives: opts.Objectives, + Namespace: f.namespace, + Name: opts.Name, + Help: opts.Help, + Objectives: opts.Objectives, + ConstLabels: f.constLabels, }, labelNames) } diff --git a/testing/networks/80069/app.toml b/testing/networks/80069/app.toml index e94654c7c8..1d459eff4f 100644 --- a/testing/networks/80069/app.toml +++ b/testing/networks/80069/app.toml @@ -73,14 +73,17 @@ app-db-backend = "pebbledb" [telemetry] # Enabled enables Prometheus metrics collection for all beacon-kit services. -# When true, metrics are exposed at the configured Prometheus endpoint. -# When false, all metrics are no-op with zero runtime overhead. -# Default: false enabled = true # ServiceName defines the namespace prefix for all Prometheus metrics. service-name = "beacond_node" +# EnableHostnameLabel enables adding hostname as a constant label to all metrics. +enable-hostname-label = true + +# GlobalLabels defines a global set of name/value label tuples applied to all metrics. +global-labels = [] + ############################################################################### ### BeaconKit ### ############################################################################### diff --git a/testing/networks/80094/app.toml b/testing/networks/80094/app.toml index b0825c0728..c173c32277 100644 --- a/testing/networks/80094/app.toml +++ b/testing/networks/80094/app.toml @@ -73,14 +73,17 @@ app-db-backend = "pebbledb" [telemetry] # Enabled enables Prometheus metrics collection for all beacon-kit services. -# When true, metrics are exposed at the configured Prometheus endpoint. -# When false, all metrics are no-op with zero runtime overhead. -# Default: false enabled = true # ServiceName defines the namespace prefix for all Prometheus metrics. service-name = "beacond_node" +# EnableHostnameLabel enables adding hostname as a constant label to all metrics. +enable-hostname-label = true + +# GlobalLabels defines a global set of name/value label tuples applied to all metrics. +global-labels = [] + ############################################################################### ### BeaconKit ### ############################################################################### From 319552c3c2fb223395683d381b46371d8ed52b0d Mon Sep 17 00:00:00 2001 From: Fridrik Asmundsson Date: Mon, 27 Oct 2025 14:32:39 +0000 Subject: [PATCH 4/5] Keep duration as milliseconds for compat (though seconds are the norm) --- beacon/blockchain/metrics.go | 8 ++++---- beacon/validator/metrics.go | 8 ++++---- consensus/cometbft/service/metrics.go | 12 ++++++------ da/blob/factory_metrics.go | 16 ++++++++-------- da/blob/processor_metrics.go | 8 ++++---- da/blob/verifier_metrics.go | 12 ++++++------ da/blobreactor/metrics.go | 4 ++-- execution/client/metrics.go | 12 ++++++------ 8 files changed, 40 insertions(+), 40 deletions(-) diff --git a/beacon/blockchain/metrics.go b/beacon/blockchain/metrics.go index 9752bf76bf..996df774c2 100644 --- a/beacon/blockchain/metrics.go +++ b/beacon/blockchain/metrics.go @@ -46,7 +46,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { StateTransitionDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_beacon_blockchain_state_transition_duration", - Help: "Time taken to process state transition in seconds", + Help: "Time taken to process state transition in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -82,7 +82,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { StateRootVerificationDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_blockchain_state_root_verification_duration", - Help: "Time taken to verify state root in seconds", + Help: "Time taken to verify state root in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -106,7 +106,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { // measureStateTransitionDuration measures the time to process the state transition for a block. func (m *Metrics) measureStateTransitionDuration(start time.Time) { - m.StateTransitionDuration.Observe(time.Since(start).Seconds()) + m.StateTransitionDuration.Observe(float64(time.Since(start).Milliseconds())) } // markRebuildPayloadForRejectedBlockSuccess increments the counter for the number of times @@ -138,5 +138,5 @@ func (m *Metrics) markOptimisticPayloadBuildFailure(slot math.Slot, err error) { // measureStateRootVerificationTime measures the time taken to verify the state root of a block. // It records the duration from the provided start time to the current time. func (m *Metrics) measureStateRootVerificationTime(start time.Time) { - m.StateRootVerificationDuration.Observe(time.Since(start).Seconds()) + m.StateRootVerificationDuration.Observe(float64(time.Since(start).Milliseconds())) } diff --git a/beacon/validator/metrics.go b/beacon/validator/metrics.go index 40ceadb77b..d85a9f87d9 100644 --- a/beacon/validator/metrics.go +++ b/beacon/validator/metrics.go @@ -48,7 +48,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { RequestBlockForProposalDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_validator_request_block_for_proposal_duration", - Help: "Time taken to request block for proposal in seconds", + Help: "Time taken to request block for proposal in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -56,7 +56,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { StateRootComputationDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_validator_state_root_computation_duration", - Help: "Time taken to compute state root in seconds", + Help: "Time taken to compute state root in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -73,12 +73,12 @@ func NewMetrics(factory metrics.Factory) *Metrics { // measureRequestBlockForProposalTime measures the time taken to request block for proposal. func (m *Metrics) measureRequestBlockForProposalTime(start time.Time) { - m.RequestBlockForProposalDuration.Observe(time.Since(start).Seconds()) + m.RequestBlockForProposalDuration.Observe(float64(time.Since(start).Milliseconds())) } // measureStateRootComputationTime measures the time taken to compute the state root of a block. func (m *Metrics) measureStateRootComputationTime(start time.Time) { - m.StateRootComputationDuration.Observe(time.Since(start).Seconds()) + m.StateRootComputationDuration.Observe(float64(time.Since(start).Milliseconds())) } // failedToRetrievePayload increments the counter for the number of times the validator diff --git a/consensus/cometbft/service/metrics.go b/consensus/cometbft/service/metrics.go index 067a42df15..83c9a427a0 100644 --- a/consensus/cometbft/service/metrics.go +++ b/consensus/cometbft/service/metrics.go @@ -55,7 +55,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { QueryDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_comet_query_duration", - Help: "Time taken to process ABCI queries in seconds", + Help: "Time taken to process ABCI queries in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"path"}, @@ -63,7 +63,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { PrepareProposalDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_runtime_prepare_proposal_duration", - Help: "Time taken to prepare a proposal in seconds", + Help: "Time taken to prepare a proposal in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -71,7 +71,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { ProcessProposalDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_runtime_process_proposal_duration", - Help: "Time taken to process a proposal in seconds", + Help: "Time taken to process a proposal in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -81,15 +81,15 @@ func NewMetrics(factory metrics.Factory) *Metrics { // measureQueryDuration is a helper to measure query duration. func (m *Metrics) measureQueryDuration(start time.Time, path string) { - m.QueryDuration.With("path", path).Observe(time.Since(start).Seconds()) + m.QueryDuration.With("path", path).Observe(float64(time.Since(start).Milliseconds())) } // measurePrepareProposalDuration is a helper to measure prepare proposal duration. func (m *Metrics) measurePrepareProposalDuration(start time.Time) { - m.PrepareProposalDuration.Observe(time.Since(start).Seconds()) + m.PrepareProposalDuration.Observe(float64(time.Since(start).Milliseconds())) } // measureProcessProposalDuration is a helper to measure process proposal duration. func (m *Metrics) measureProcessProposalDuration(start time.Time) { - m.ProcessProposalDuration.Observe(time.Since(start).Seconds()) + m.ProcessProposalDuration.Observe(float64(time.Since(start).Milliseconds())) } diff --git a/da/blob/factory_metrics.go b/da/blob/factory_metrics.go index 59382e9be0..18c45e4298 100644 --- a/da/blob/factory_metrics.go +++ b/da/blob/factory_metrics.go @@ -42,7 +42,7 @@ func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { BuildSidecarDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_factory_build_sidecar_duration", - Help: "Time taken to build blob sidecars in seconds", + Help: "Time taken to build blob sidecars in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, @@ -50,7 +50,7 @@ func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { BuildKZGInclusionProofDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_factory_build_kzg_inclusion_proof_duration", - Help: "Time taken to build KZG inclusion proof in seconds", + Help: "Time taken to build KZG inclusion proof in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -58,7 +58,7 @@ func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { BuildBlockBodyProofDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_factory_build_block_body_proof_duration", - Help: "Time taken to build block body proof in seconds", + Help: "Time taken to build block body proof in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -66,7 +66,7 @@ func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { BuildCommitmentProofDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_factory_build_commitment_proof_duration", - Help: "Time taken to build commitment proof in seconds", + Help: "Time taken to build commitment proof in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -78,26 +78,26 @@ func NewFactoryMetrics(factory metrics.Factory) *FactoryMetrics { func (m *FactoryMetrics) measureBuildSidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - m.BuildSidecarDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) + m.BuildSidecarDuration.With("num_sidecars", numSidecars.Base10()).Observe(float64(time.Since(startTime).Milliseconds())) } // measureBuildKZGInclusionProofDuration measures the duration of the build KZG inclusion proof. func (m *FactoryMetrics) measureBuildKZGInclusionProofDuration( startTime time.Time, ) { - m.BuildKZGInclusionProofDuration.Observe(time.Since(startTime).Seconds()) + m.BuildKZGInclusionProofDuration.Observe(float64(time.Since(startTime).Milliseconds())) } // measureBuildBlockBodyProofDuration measures the duration of the build block body proof. func (m *FactoryMetrics) measureBuildBlockBodyProofDuration( startTime time.Time, ) { - m.BuildBlockBodyProofDuration.Observe(time.Since(startTime).Seconds()) + m.BuildBlockBodyProofDuration.Observe(float64(time.Since(startTime).Milliseconds())) } // measureBuildCommitmentProofDuration measures the duration of the build commitment proof. func (m *FactoryMetrics) measureBuildCommitmentProofDuration( startTime time.Time, ) { - m.BuildCommitmentProofDuration.Observe(time.Since(startTime).Seconds()) + m.BuildCommitmentProofDuration.Observe(float64(time.Since(startTime).Milliseconds())) } diff --git a/da/blob/processor_metrics.go b/da/blob/processor_metrics.go index 64ce6836e0..f3a19e7158 100644 --- a/da/blob/processor_metrics.go +++ b/da/blob/processor_metrics.go @@ -40,7 +40,7 @@ func NewProcessorMetrics(factory metrics.Factory) *ProcessorMetrics { VerifyBlobsDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_processor_verify_blobs_duration", - Help: "Time taken to verify blob sidecars in seconds", + Help: "Time taken to verify blob sidecars in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, @@ -48,7 +48,7 @@ func NewProcessorMetrics(factory metrics.Factory) *ProcessorMetrics { ProcessBlobDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_processor_process_blob_duration", - Help: "Time taken to process blob sidecars in seconds", + Help: "Time taken to process blob sidecars in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, @@ -61,7 +61,7 @@ func (m *ProcessorMetrics) measureVerifySidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - m.VerifyBlobsDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) + m.VerifyBlobsDuration.With("num_sidecars", numSidecars.Base10()).Observe(float64(time.Since(startTime).Milliseconds())) } // measureProcessSidecarsDuration measures the duration of the blob processing. @@ -69,5 +69,5 @@ func (m *ProcessorMetrics) measureProcessSidecarsDuration( startTime time.Time, numSidecars math.U64, ) { - m.ProcessBlobDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) + m.ProcessBlobDuration.With("num_sidecars", numSidecars.Base10()).Observe(float64(time.Since(startTime).Milliseconds())) } diff --git a/da/blob/verifier_metrics.go b/da/blob/verifier_metrics.go index 89b570cbce..f1a7ec2456 100644 --- a/da/blob/verifier_metrics.go +++ b/da/blob/verifier_metrics.go @@ -41,7 +41,7 @@ func NewVerifierMetrics(factory metrics.Factory) *VerifierMetrics { VerifyBlobsDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_verifier_verify_blobs_duration", - Help: "Time taken to verify blobs in seconds", + Help: "Time taken to verify blobs in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars", "kzg_implementation"}, @@ -49,7 +49,7 @@ func NewVerifierMetrics(factory metrics.Factory) *VerifierMetrics { VerifyInclusionProofsDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_verifier_verify_inclusion_proofs_duration", - Help: "Time taken to verify inclusion proofs in seconds", + Help: "Time taken to verify inclusion proofs in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars"}, @@ -57,7 +57,7 @@ func NewVerifierMetrics(factory metrics.Factory) *VerifierMetrics { VerifyKZGProofsDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_da_blob_verifier_verify_kzg_proofs_duration", - Help: "Time taken to verify KZG proofs in seconds", + Help: "Time taken to verify KZG proofs in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"num_sidecars", "kzg_implementation"}, @@ -74,7 +74,7 @@ func (m *VerifierMetrics) measureVerifySidecarsDuration( m.VerifyBlobsDuration.With( "num_sidecars", numSidecars.Base10(), "kzg_implementation", kzgImplementation, - ).Observe(time.Since(startTime).Seconds()) + ).Observe(float64(time.Since(startTime).Milliseconds())) } // measureVerifyInclusionProofsDuration measures the duration of the inclusion proofs verification. @@ -82,7 +82,7 @@ func (m *VerifierMetrics) measureVerifyInclusionProofsDuration( startTime time.Time, numSidecars math.U64, ) { - m.VerifyInclusionProofsDuration.With("num_sidecars", numSidecars.Base10()).Observe(time.Since(startTime).Seconds()) + m.VerifyInclusionProofsDuration.With("num_sidecars", numSidecars.Base10()).Observe(float64(time.Since(startTime).Milliseconds())) } // measureVerifyKZGProofsDuration measures the duration of the KZG proofs verification. @@ -94,5 +94,5 @@ func (m *VerifierMetrics) measureVerifyKZGProofsDuration( m.VerifyKZGProofsDuration.With( "num_sidecars", numSidecars.Base10(), "kzg_implementation", kzgImplementation, - ).Observe(time.Since(startTime).Seconds()) + ).Observe(float64(time.Since(startTime).Milliseconds())) } diff --git a/da/blobreactor/metrics.go b/da/blobreactor/metrics.go index ff65d067ad..480f62082a 100644 --- a/da/blobreactor/metrics.go +++ b/da/blobreactor/metrics.go @@ -65,7 +65,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { RequestDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_blobreactor_request_duration", - Help: "Time taken to complete blob requests in seconds", + Help: "Time taken to complete blob requests in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, []string{"status"}, @@ -111,7 +111,7 @@ func NewMetrics(factory metrics.Factory) *Metrics { // recordOverallRequestComplete records completion of entire blob request (may try multiple peers). func (m *Metrics) recordOverallRequestComplete(status string, start time.Time) { m.RequestTotal.With("status", status).Add(1) - m.RequestDuration.With("status", status).Observe(time.Since(start).Seconds()) + m.RequestDuration.With("status", status).Observe(float64(time.Since(start).Milliseconds())) } // recordPeerAttempt records a single peer attempt with status (no duration to avoid high cardinality). diff --git a/execution/client/metrics.go b/execution/client/metrics.go index 7f3e18a940..3ce5c3cc62 100644 --- a/execution/client/metrics.go +++ b/execution/client/metrics.go @@ -66,7 +66,7 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { ForkchoiceUpdateDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_execution_client_forkchoice_update_duration", - Help: "Time taken for forkchoice update in seconds", + Help: "Time taken for forkchoice update in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -74,7 +74,7 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { NewPayloadDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_execution_client_new_payload_duration", - Help: "Time taken for new payload in seconds", + Help: "Time taken for new payload in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -82,7 +82,7 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { GetPayloadDuration: factory.NewSummary( metrics.SummaryOpts{ Name: "beacon_kit_execution_client_get_payload_duration", - Help: "Time taken for get payload in seconds", + Help: "Time taken for get payload in milliseconds", Objectives: metrics.QuantilesP50P90P99, }, nil, @@ -203,17 +203,17 @@ func NewMetrics(factory metrics.Factory, logger log.Logger) *Metrics { // measureForkchoiceUpdateDuration measures the duration of the forkchoice update. func (m *Metrics) measureForkchoiceUpdateDuration(startTime time.Time) { - m.ForkchoiceUpdateDuration.Observe(time.Since(startTime).Seconds()) + m.ForkchoiceUpdateDuration.Observe(float64(time.Since(startTime).Milliseconds())) } // measureNewPayloadDuration measures the duration of the new payload. func (m *Metrics) measureNewPayloadDuration(startTime time.Time) { - m.NewPayloadDuration.Observe(time.Since(startTime).Seconds()) + m.NewPayloadDuration.Observe(float64(time.Since(startTime).Milliseconds())) } // measureGetPayloadDuration measures the duration of the get payload. func (m *Metrics) measureGetPayloadDuration(startTime time.Time) { - m.GetPayloadDuration.Observe(time.Since(startTime).Seconds()) + m.GetPayloadDuration.Observe(float64(time.Since(startTime).Milliseconds())) } // incrementEngineAPITimeout increments the timeout counter for general engine api timeouts. From 5fe4aab64b2d0f731f2013f8dd37969d61a20d2e Mon Sep 17 00:00:00 2001 From: Fridrik Asmundsson Date: Mon, 27 Oct 2025 14:52:04 +0000 Subject: [PATCH 5/5] Inject prometheus.Registerer into metrics factory for test isolation --- node-core/components/metrics_factory.go | 15 ++++- node-core/components/metrics_providers.go | 3 +- .../metrics/prometheus/prometheus.go | 61 +++++++++++-------- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/node-core/components/metrics_factory.go b/node-core/components/metrics_factory.go index de8f1f83f5..15482c069e 100644 --- a/node-core/components/metrics_factory.go +++ b/node-core/components/metrics_factory.go @@ -34,8 +34,16 @@ import ( type MetricsFactoryInput struct { depinject.In - Config *config.Config - Logger *phuslu.Logger + + Registerer promlib.Registerer + Config *config.Config + Logger *phuslu.Logger +} + +// ProvidePrometheusRegisterer provides a prometheus.Registerer for metrics registration. +// Uses the default registerer which is also used by the prometheus HTTP handler. +func ProvidePrometheusRegisterer() promlib.Registerer { + return promlib.DefaultRegisterer } // ProvideMetricsFactory provides a metrics factory based on configuration. @@ -53,12 +61,13 @@ func ProvideMetricsFactory(in MetricsFactoryInput) metrics.Factory { // If we have any constant labels, create factory with labels if len(constLabels) > 0 { return prometheus.NewFactoryWithLabels( + in.Registerer, in.Config.Telemetry.ServiceName, constLabels, ) } - return prometheus.NewFactory(in.Config.Telemetry.ServiceName) + return prometheus.NewFactory(in.Registerer, in.Config.Telemetry.ServiceName) } // buildConstLabels builds constant labels from telemetry config. diff --git a/node-core/components/metrics_providers.go b/node-core/components/metrics_providers.go index 22f25793fb..09e39c04f6 100644 --- a/node-core/components/metrics_providers.go +++ b/node-core/components/metrics_providers.go @@ -97,7 +97,8 @@ func ProvideExecutionEngineMetrics(in ExecutionEngineMetricsInput) *engine.Metri // in component lists (defaults.go, components.go, etc.). func AllMetricsProviders() []any { return []any{ - ProvideMetricsFactory, // Must be first - creates factory used by all others + ProvidePrometheusRegisterer, // Must be first - creates registerer used by factory + ProvideMetricsFactory, // Must be second - creates factory used by all others ProvideStateDBMetrics, ProvideValidatorMetrics, ProvideExecutionClientMetrics, diff --git a/observability/metrics/prometheus/prometheus.go b/observability/metrics/prometheus/prometheus.go index dac74a205b..81e4354174 100644 --- a/observability/metrics/prometheus/prometheus.go +++ b/observability/metrics/prometheus/prometheus.go @@ -26,69 +26,80 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// Factory creates Prometheus metrics and registers them with prometheus.DefaultRegisterer. +// Factory creates Prometheus metrics and registers them with the provided registerer. type Factory struct { namespace string constLabels prometheus.Labels + registerer prometheus.Registerer } -// NewFactory creates a new Prometheus metrics factory with the given namespace. -func NewFactory(namespace string) metrics.Factory { +// NewFactory creates a new Prometheus metrics factory with the given registerer and namespace. +func NewFactory(registerer prometheus.Registerer, namespace string) metrics.Factory { return &Factory{ namespace: namespace, constLabels: nil, + registerer: registerer, } } -// NewFactoryWithLabels creates a new Prometheus metrics factory with constant labels. +// NewFactoryWithLabels creates a new Prometheus metrics factory with registerer, namespace, and constant labels. // Constant labels are applied to all metrics created by this factory. -func NewFactoryWithLabels(namespace string, constLabels prometheus.Labels) metrics.Factory { +func NewFactoryWithLabels(registerer prometheus.Registerer, namespace string, constLabels prometheus.Labels) metrics.Factory { return &Factory{ namespace: namespace, constLabels: constLabels, + registerer: registerer, } } -// NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. +// NewCounter creates a new Counter that registers with the factory's registerer. func (f *Factory) NewCounter(opts metrics.CounterOpts, labelNames []string) metrics.Counter { - return NewCounter(prometheus.CounterOpts{ + cv := prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: f.namespace, Name: opts.Name, Help: opts.Help, ConstLabels: f.constLabels, }, labelNames) + f.registerer.MustRegister(cv) + return &counter{cv: cv} } -// NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. +// NewGauge creates a new Gauge that registers with the factory's registerer. func (f *Factory) NewGauge(opts metrics.GaugeOpts, labelNames []string) metrics.Gauge { - return NewGauge(prometheus.GaugeOpts{ + gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: f.namespace, Name: opts.Name, Help: opts.Help, ConstLabels: f.constLabels, }, labelNames) + f.registerer.MustRegister(gv) + return &gauge{gv: gv} } -// NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. +// NewHistogram creates a new Histogram that registers with the factory's registerer. func (f *Factory) NewHistogram(opts metrics.HistogramOpts, labelNames []string) metrics.Histogram { - return NewHistogram(prometheus.HistogramOpts{ + hv := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: f.namespace, Name: opts.Name, Help: opts.Help, Buckets: opts.Buckets, ConstLabels: f.constLabels, }, labelNames) + f.registerer.MustRegister(hv) + return &histogram{hv: hv} } -// NewSummary creates a new Summary that registers with prometheus.DefaultRegisterer. +// NewSummary creates a new Summary that registers with the factory's registerer. func (f *Factory) NewSummary(opts metrics.SummaryOpts, labelNames []string) metrics.Summary { - return NewSummary(prometheus.SummaryOpts{ + sv := prometheus.NewSummaryVec(prometheus.SummaryOpts{ Namespace: f.namespace, Name: opts.Name, Help: opts.Help, Objectives: opts.Objectives, ConstLabels: f.constLabels, }, labelNames) + f.registerer.MustRegister(sv) + return &summary{sv: sv} } // counter wraps a prometheus.CounterVec and implements the metrics.Counter interface. @@ -97,10 +108,10 @@ type counter struct { lvs lv.LabelValues } -// NewCounter creates a new Counter that registers with prometheus.DefaultRegisterer. -func NewCounter(opts prometheus.CounterOpts, labelNames []string) metrics.Counter { +// NewCounter creates a new Counter that registers with the provided registerer. +func NewCounter(registerer prometheus.Registerer, opts prometheus.CounterOpts, labelNames []string) metrics.Counter { cv := prometheus.NewCounterVec(opts, labelNames) - prometheus.MustRegister(cv) + registerer.MustRegister(cv) return &counter{cv: cv} } @@ -132,10 +143,10 @@ type gauge struct { lvs lv.LabelValues } -// NewGauge creates a new Gauge that registers with prometheus.DefaultRegisterer. -func NewGauge(opts prometheus.GaugeOpts, labelNames []string) metrics.Gauge { +// NewGauge creates a new Gauge that registers with the provided registerer. +func NewGauge(registerer prometheus.Registerer, opts prometheus.GaugeOpts, labelNames []string) metrics.Gauge { gv := prometheus.NewGaugeVec(opts, labelNames) - prometheus.MustRegister(gv) + registerer.MustRegister(gv) return &gauge{gv: gv} } @@ -172,10 +183,10 @@ type histogram struct { lvs lv.LabelValues } -// NewHistogram creates a new Histogram that registers with prometheus.DefaultRegisterer. -func NewHistogram(opts prometheus.HistogramOpts, labelNames []string) metrics.Histogram { +// NewHistogram creates a new Histogram that registers with the provided registerer. +func NewHistogram(registerer prometheus.Registerer, opts prometheus.HistogramOpts, labelNames []string) metrics.Histogram { hv := prometheus.NewHistogramVec(opts, labelNames) - prometheus.MustRegister(hv) + registerer.MustRegister(hv) return &histogram{hv: hv} } @@ -207,10 +218,10 @@ type summary struct { lvs lv.LabelValues } -// NewSummary creates a new Summary that registers with prometheus.DefaultRegisterer. -func NewSummary(opts prometheus.SummaryOpts, labelNames []string) metrics.Summary { +// NewSummary creates a new Summary that registers with the provided registerer. +func NewSummary(registerer prometheus.Registerer, opts prometheus.SummaryOpts, labelNames []string) metrics.Summary { sv := prometheus.NewSummaryVec(opts, labelNames) - prometheus.MustRegister(sv) + registerer.MustRegister(sv) return &summary{sv: sv} }