Skip to content

Improve Disk Monitoring Logic #7421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: unstable
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions beacon_node/http_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4231,9 +4231,10 @@ pub fn serve<T: BeaconChainTypes>(
.and(warp::path("health"))
.and(warp::path::end())
.and(task_spawner_filter.clone())
.then(|task_spawner: TaskSpawner<T::EthSpec>| {
.and(data_dir_filter.clone())
.then(|task_spawner: TaskSpawner<T::EthSpec>, data_dir: PathBuf| {
task_spawner.blocking_json_task(Priority::P0, move || {
eth2::lighthouse::Health::observe()
eth2::lighthouse::Health::observe_with_path(data_dir)
.map(api_types::GenericResponse::from)
.map_err(warp_utils::reject::custom_bad_request)
})
Expand Down
1 change: 1 addition & 0 deletions beacon_node/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ pub fn get_config<E: EthSpec>(
freezer_db_path: None,
update_period_secs,
monitoring_endpoint: monitoring_endpoint.to_string(),
monitoring_dir: client_config.data_dir().clone()
});
}

Expand Down
3 changes: 2 additions & 1 deletion beacon_node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
let spec = context.eth2_config().spec.clone();
let client_genesis = client_config.genesis.clone();
let store_config = client_config.store.clone();
let _datadir = client_config.create_data_dir()?;
let datadir = client_config.create_data_dir()?;
let db_path = client_config.create_db_path()?;
let freezer_db_path = client_config.create_freezer_db_path()?;
let blobs_db_path = client_config.create_blobs_db_path()?;
Expand Down Expand Up @@ -122,6 +122,7 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
};

let builder = if let Some(monitoring_config) = &mut client_config.monitoring_api {
monitoring_config.monitoring_dir = datadir;
monitoring_config.db_path = Some(db_path);
monitoring_config.freezer_db_path = Some(freezer_db_path);
builder.monitoring_client(monitoring_config)?
Expand Down
25 changes: 15 additions & 10 deletions common/health_metrics/src/observe.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::path::Path;
use eth2::lighthouse::{Health, ProcessHealth, SystemHealth};

#[cfg(target_os = "linux")]
Expand All @@ -7,32 +8,36 @@ use {
};

pub trait Observe: Sized {
fn observe() -> Result<Self, String>;
fn observe_with_path<P>(datadir: P) -> Result<Self, String> where P: AsRef<Path>;

fn observe() -> Result<Self, String> {
Self::observe_with_path("/")
}
}

impl Observe for Health {
#[cfg(not(target_os = "linux"))]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(_datadir: P) -> Result<Self, String> where P: AsRef<Path> {
Err("Health is only available on Linux".into())
}

#[cfg(target_os = "linux")]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(datadir: P) -> Result<Self, String> where P: AsRef<Path> {
Ok(Self {
process: ProcessHealth::observe()?,
system: SystemHealth::observe()?,
process: ProcessHealth::observe_with_path(&datadir)?,
system: SystemHealth::observe_with_path(&datadir)?,
})
}
}

impl Observe for SystemHealth {
#[cfg(not(target_os = "linux"))]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(_datadir: P) -> Result<Self, String> where P: AsRef<Path> {
Err("Health is only available on Linux".into())
}

#[cfg(target_os = "linux")]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(datadir: P) -> Result<Self, String> where P: AsRef<Path> {
let vm = psutil::memory::virtual_memory()
.map_err(|e| format!("Unable to get virtual memory: {:?}", e))?;
let loadavg =
Expand All @@ -41,7 +46,7 @@ impl Observe for SystemHealth {
let cpu =
psutil::cpu::cpu_times().map_err(|e| format!("Unable to get cpu times: {:?}", e))?;

let disk_usage = psutil::disk::disk_usage("/")
let disk_usage = psutil::disk::disk_usage(&datadir)
.map_err(|e| format!("Unable to disk usage info: {:?}", e))?;

let disk = psutil::disk::DiskIoCountersCollector::default()
Expand Down Expand Up @@ -90,12 +95,12 @@ impl Observe for SystemHealth {

impl Observe for ProcessHealth {
#[cfg(not(target_os = "linux"))]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(_datadir: P) -> Result<Self, String> where P: AsRef<Path> {
Err("Health is only available on Linux".into())
}

#[cfg(target_os = "linux")]
fn observe() -> Result<Self, String> {
fn observe_with_path<P>(_datadir: P) -> Result<Self, String> where P: AsRef<Path> {
let process =
Process::current().map_err(|e| format!("Unable to get current process: {:?}", e))?;

Expand Down
6 changes: 5 additions & 1 deletion common/monitoring_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ impl std::fmt::Display for Error {
pub struct Config {
/// Endpoint
pub monitoring_endpoint: String,
/// Path for the data directory
pub monitoring_dir: PathBuf,
/// Path for the hot database required for fetching beacon db size metrics.
/// Note: not relevant for validator and system metrics.
pub db_path: Option<PathBuf>,
Expand All @@ -63,6 +65,7 @@ pub struct Config {
#[derive(Clone)]
pub struct MonitoringHttpClient {
client: reqwest::Client,
monitoring_dir: PathBuf,
/// Path to the hot database. Required for getting db size metrics
db_path: Option<PathBuf>,
/// Path to the freezer database.
Expand All @@ -75,6 +78,7 @@ impl MonitoringHttpClient {
pub fn new(config: &Config) -> Result<Self, String> {
Ok(Self {
client: reqwest::Client::new(),
monitoring_dir: config.monitoring_dir.clone(),
db_path: config.db_path.clone(),
freezer_db_path: config.freezer_db_path.clone(),
update_period: Duration::from_secs(
Expand Down Expand Up @@ -159,7 +163,7 @@ impl MonitoringHttpClient {

/// Gets system metrics by observing capturing the SystemHealth metrics.
pub fn get_system_metrics(&self) -> Result<MonitoringMetrics, Error> {
let system_health = SystemHealth::observe().map_err(Error::SystemMetricsFailed)?;
let system_health = SystemHealth::observe_with_path(&self.monitoring_dir).map_err(Error::SystemMetricsFailed)?;
Ok(MonitoringMetrics {
metadata: Metadata::new(ProcessType::System),
process_metrics: Process::System(system_health.into()),
Expand Down
15 changes: 10 additions & 5 deletions validator_client/http_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ pub struct Context<T: SlotClock, E> {
#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
pub struct Config {
pub enabled: bool,
pub monitoring_dir: PathBuf,
pub listen_addr: IpAddr,
pub listen_port: u16,
pub allow_origin: Option<String>,
Expand All @@ -107,14 +108,17 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
// This value is always overridden when building config from CLI.
let http_token_path = dirs::home_dir()
let validator_dir = dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(DEFAULT_ROOT_DIR)
.join(DEFAULT_HARDCODED_NETWORK)
.join(DEFAULT_VALIDATOR_DIR)
.join(PK_FILENAME);
.join(DEFAULT_VALIDATOR_DIR);

let http_token_path = validator_dir.join(PK_FILENAME);

Self {
enabled: false,
monitoring_dir: validator_dir,
listen_addr: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
listen_port: 5062,
allow_origin: None,
Expand Down Expand Up @@ -294,9 +298,10 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
let get_lighthouse_health = warp::path("lighthouse")
.and(warp::path("health"))
.and(warp::path::end())
.then(|| {
.and(validator_dir_filter.clone())
.then(|validator_dir: PathBuf| {
blocking_json_task(move || {
eth2::lighthouse::Health::observe()
eth2::lighthouse::Health::observe_with_path(validator_dir)
.map(api_types::GenericResponse::from)
.map_err(warp_utils::reject::custom_bad_request)
})
Expand Down
1 change: 1 addition & 0 deletions validator_client/http_api/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ impl ApiTester {
pub fn default_http_config() -> HttpConfig {
HttpConfig {
enabled: true,
monitoring_dir: tempdir().unwrap().path().to_path_buf(),
listen_addr: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
listen_port: 0,
allow_origin: None,
Expand Down
1 change: 1 addition & 0 deletions validator_client/http_api/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ impl ApiTester {
allow_origin: None,
allow_keystore_export: true,
store_passwords_in_secrets_dir: false,
monitoring_dir: validator_dir.path().into(),
http_token_path: token_path,
},
sse_logging_components: None,
Expand Down
5 changes: 4 additions & 1 deletion validator_client/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,10 @@ impl Config {
.map(|home| home.join(DEFAULT_ROOT_DIR))
.unwrap_or_else(|| PathBuf::from("."));

let (mut validator_dir, mut secrets_dir) = (None, None);
let (mut validator_dir, mut secrets_dir, mut data_dir) = (None, None, None);
if cli_args.get_one::<String>("datadir").is_some() {
let base_dir: PathBuf = parse_required(cli_args, "datadir")?;
data_dir = Some(base_dir.clone());
validator_dir = Some(base_dir.join(DEFAULT_VALIDATOR_DIR));
secrets_dir = Some(base_dir.join(DEFAULT_SECRET_DIR));
}
Expand Down Expand Up @@ -275,6 +276,7 @@ impl Config {
*/

config.http_api.enabled = validator_client_config.http;
config.http_api.monitoring_dir = data_dir.unwrap_or(config.validator_dir.clone());

if let Some(address) = &validator_client_config.http_address {
if validator_client_config.unencrypted_http_transport {
Expand Down Expand Up @@ -346,6 +348,7 @@ impl Config {
if let Some(monitoring_endpoint) = validator_client_config.monitoring_endpoint.as_ref() {
let update_period_secs = Some(validator_client_config.monitoring_endpoint_period);
config.monitoring_api = Some(monitoring_api::Config {
monitoring_dir: config.http_api.monitoring_dir.clone(),
db_path: None,
freezer_db_path: None,
update_period_secs,
Expand Down