Skip to content

Commit d0078df

Browse files
authored
refactor(api): Add more components to healthcheck – follow-ups (#3337)
## What ❔ Various minor follow-ups after #3193: - Rework app-level health details. - Fix `execution_time` unit of measurement for the database health check details. - Rework the database health check: do not hold a DB connection all the time; make it reactive. ## Why ❔ Makes the dependency graph lighter; simplifies maintenance. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`.
1 parent cf458a0 commit d0078df

File tree

17 files changed

+111
-153
lines changed

17 files changed

+111
-153
lines changed

Cargo.lock

Lines changed: 1 addition & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ members = [
7979
# Test infrastructure
8080
"core/tests/loadnext",
8181
"core/tests/vm-benchmark",
82-
"core/lib/bin_metadata",
8382
]
8483
resolver = "2"
8584

core/bin/external_node/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ zksync_health_check.workspace = true
2929
zksync_web3_decl.workspace = true
3030
zksync_types.workspace = true
3131
zksync_block_reverter.workspace = true
32-
zksync_shared_metrics.workspace = true
3332
zksync_node_genesis.workspace = true
3433
zksync_node_fee_model.workspace = true
3534
zksync_node_db_pruner.workspace = true

core/bin/external_node/src/metrics/framework.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use zksync_node_framework::{
55
implementations::resources::pools::{MasterPool, PoolResource},
66
FromContext, IntoContext, StopReceiver, Task, TaskId, WiringError, WiringLayer,
77
};
8-
use zksync_shared_metrics::{GIT_METRICS, RUST_METRICS};
98
use zksync_types::{L1ChainId, L2ChainId, SLChainId};
109

1110
use super::EN_METRICS;
@@ -39,8 +38,6 @@ impl WiringLayer for ExternalNodeMetricsLayer {
3938
}
4039

4140
async fn wire(self, input: Self::Input) -> Result<Self::Output, WiringError> {
42-
RUST_METRICS.initialize();
43-
GIT_METRICS.initialize();
4441
EN_METRICS.observe_config(
4542
self.l1_chain_id,
4643
self.sl_chain_id,

core/lib/bin_metadata/Cargo.toml

Lines changed: 0 additions & 18 deletions
This file was deleted.

core/lib/dal/src/system_dal.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{collections::HashMap, time::Duration};
22

3-
use chrono::DateTime;
3+
use chrono::{DateTime, Utc};
44
use serde::{Deserialize, Serialize};
55
use zksync_db_connection::{connection::Connection, error::DalResult, instrument::InstrumentExt};
66

@@ -14,11 +14,11 @@ pub(crate) struct TableSize {
1414
pub total_size: u64,
1515
}
1616

17-
#[derive(Debug, Serialize, Deserialize)]
17+
#[derive(Debug, Clone, Serialize, Deserialize)]
1818
pub struct DatabaseMigration {
1919
pub version: i64,
2020
pub description: String,
21-
pub installed_on: DateTime<chrono::Utc>,
21+
pub installed_on: DateTime<Utc>,
2222
pub success: bool,
2323
pub checksum: String,
2424
pub execution_time: Duration,
@@ -118,7 +118,7 @@ impl SystemDal<'_, '_> {
118118
installed_on: row.installed_on,
119119
success: row.success,
120120
checksum: hex::encode(row.checksum),
121-
execution_time: Duration::from_millis(u64::try_from(row.execution_time).unwrap_or(0)),
121+
execution_time: Duration::from_nanos(u64::try_from(row.execution_time).unwrap_or(0)),
122122
})
123123
}
124124
}

core/lib/health_check/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ serde_json.workspace = true
2020
thiserror.workspace = true
2121
tokio = { workspace = true, features = ["sync", "time"] }
2222
tracing.workspace = true
23-
zksync_bin_metadata.workspace = true
2423

2524
[dev-dependencies]
2625
assert_matches.workspace = true

core/lib/health_check/src/binary.rs

Lines changed: 0 additions & 21 deletions
This file was deleted.

core/lib/health_check/src/lib.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,9 @@ pub use async_trait::async_trait;
1111
use futures::future;
1212
use serde::Serialize;
1313
use tokio::sync::watch;
14-
use zksync_bin_metadata::BIN_METADATA;
1514

16-
use self::metrics::{CheckResult, METRICS};
17-
use crate::metrics::AppHealthCheckConfig;
15+
use crate::metrics::{AppHealthCheckConfig, CheckResult, METRICS};
1816

19-
mod binary;
2017
mod metrics;
2118

2219
#[cfg(test)]
@@ -114,6 +111,8 @@ pub struct AppHealthCheck {
114111

115112
#[derive(Debug, Clone)]
116113
struct AppHealthCheckInner {
114+
/// Application-level health details.
115+
app_details: Option<serde_json::Value>,
117116
components: Vec<Arc<dyn CheckHealth>>,
118117
slow_time_limit: Duration,
119118
hard_time_limit: Duration,
@@ -136,6 +135,7 @@ impl AppHealthCheck {
136135

137136
let inner = AppHealthCheckInner {
138137
components: Vec::default(),
138+
app_details: None,
139139
slow_time_limit,
140140
hard_time_limit,
141141
};
@@ -181,6 +181,13 @@ impl AppHealthCheck {
181181
}
182182
}
183183

184+
/// Sets app-level health details. They can include build info etc.
185+
pub fn set_details(&self, details: impl Serialize) {
186+
let details = serde_json::to_value(details).expect("failed serializing app details");
187+
let mut inner = self.inner.lock().expect("`AppHealthCheck` is poisoned");
188+
inner.app_details = Some(details);
189+
}
190+
184191
/// Inserts health check for a component.
185192
///
186193
/// # Errors
@@ -220,6 +227,7 @@ impl AppHealthCheck {
220227
// Clone `inner` so that we don't hold a lock for them across a wait point.
221228
let AppHealthCheckInner {
222229
components,
230+
app_details,
223231
slow_time_limit,
224232
hard_time_limit,
225233
} = self
@@ -238,7 +246,8 @@ impl AppHealthCheck {
238246
.map(|health| health.status)
239247
.max_by_key(|status| status.priority_for_aggregation())
240248
.unwrap_or(HealthStatus::Ready);
241-
let inner = Health::with_details(aggregated_status.into(), BIN_METADATA);
249+
let mut inner = Health::from(aggregated_status);
250+
inner.details = app_details.clone();
242251

243252
let health = AppHealth { inner, components };
244253
if !health.inner.status.is_healthy() {

core/lib/health_check/src/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ async fn aggregating_health_checks() {
8282
let (first_check, first_updater) = ReactiveHealthCheck::new("first");
8383
let (second_check, second_updater) = ReactiveHealthCheck::new("second");
8484
let inner = AppHealthCheckInner {
85+
app_details: None,
8586
components: vec![Arc::new(first_check), Arc::new(second_check)],
8687
slow_time_limit: AppHealthCheck::DEFAULT_SLOW_TIME_LIMIT,
8788
hard_time_limit: AppHealthCheck::DEFAULT_HARD_TIME_LIMIT,

0 commit comments

Comments
 (0)