Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,7 @@ features = [ "test-helpers" ]
[target."cfg(target_family = \"unix\")".dependencies.nix]
version = "0.30"
default-features = false
features = [ "resource" ]
features = [ "fs", "resource" ]

[target.'cfg(unix)'.dependencies]
rlimit = "0.11"
4 changes: 4 additions & 0 deletions cli/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,10 @@ impl Start {
// Error messages.
let node_parse_error = || "Failed to start node";

// Periodically check if the number of file descriptors isn't becoming insufficient.
#[cfg(unix)]
crate::helpers::spawn_fd_monitor();

// Clone the configurations.
let mut self_ = self.clone();

Expand Down
195 changes: 195 additions & 0 deletions cli/src/helpers/fd_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copyright (c) 2019-2026 Provable Inc.
// This file is part of the snarkOS library.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at:

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::io;

use tokio::time::{Duration, MissedTickBehavior, interval};
use tracing::*;

/// Node-scale fd use.
#[derive(Debug, Clone, Copy)]
pub struct FdUsage {
/// File descriptors currently open.
pub open: u64,
/// Current soft limit (RLIMIT_NOFILE). `None` == unlimited.
pub soft_limit: Option<u64>,
}

impl FdUsage {
/// Fraction of the soft limit in use (0.0..=1.0). 0.0 when unlimited.
pub fn ratio(&self) -> f64 {
match self.soft_limit {
Some(limit) if limit > 0 => self.open as f64 / limit as f64,
_ => 0.0,
}
}

/// True once usage reaches `threshold` of the soft limit (e.g. 0.8 == 80%).
pub fn approaching_limit(&self, threshold: f64) -> bool {
self.soft_limit.is_some() && self.ratio() >= threshold
}
}

/// Probe the live system: current soft limit + count of open descriptors.
pub fn fd_usage() -> io::Result<FdUsage> {
let soft_limit = soft_nofile_limit()?;
let open = count_open_fds(soft_limit)?;
Ok(FdUsage { open, soft_limit })
}

fn soft_nofile_limit() -> io::Result<Option<u64>> {
let (soft, _hard) = rlimit::Resource::NOFILE.get()?;
Ok(if soft == rlimit::INFINITY { None } else { Some(soft) })
}

#[cfg(target_os = "linux")]
fn count_open_fds(_limit: Option<u64>) -> io::Result<u64> {
// Each open descriptor is an entry in /proc/self/fd. The directory
// handle itself holds one fd while we iterate, so subtract it back out.
let mut n: u64 = 0;
for entry in std::fs::read_dir("/proc/self/fd")? {
entry?;
n += 1;
}
Ok(n.saturating_sub(1))
}

#[cfg(all(unix, not(target_os = "linux")))]
fn count_open_fds(limit: Option<u64>) -> io::Result<u64> {
// Portable fallback (macOS, *BSD, ...): probe each slot with
// fcntl(F_GETFD). O(limit) syscalls, no /proc dependency.
use nix::fcntl::{FcntlArg, fcntl};
let max = limit.unwrap_or(65_536).min(i32::MAX as u64) as i32;
let n = (0..max).filter(|&fd| fcntl(fd, FcntlArg::F_GETFD).is_ok()).count();
Ok(n as u64)
}

/// System-wide (whole machine) fd use.
#[derive(Debug, Clone, Copy)]
pub struct SystemFd {
pub allocated: u64,
pub max: u64,
}

impl SystemFd {
pub fn ratio(&self) -> f64 {
if self.max > 0 { self.allocated as f64 / self.max as f64 } else { 0.0 }
}
}

#[cfg(target_os = "linux")]
pub fn system_fd_usage() -> std::io::Result<SystemFd> {
// /proc/sys/fs/file-nr => "<allocated>\t<free, always 0>\t<max>"
let s = std::fs::read_to_string("/proc/sys/fs/file-nr")?;
let mut f = s.split_whitespace();
let bad = || std::io::Error::new(std::io::ErrorKind::InvalidData, "unexpected file-nr format");
let allocated = f.next().and_then(|v| v.parse().ok()).ok_or_else(bad)?;
let _free = f.next(); // always 0 on modern kernels
let max = f.next().and_then(|v| v.parse().ok()).ok_or_else(bad)?;
Ok(SystemFd { allocated, max })
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn system_fd_usage() -> std::io::Result<SystemFd> {
// OID names differ by flavor; values are plain integers.
#[cfg(target_os = "freebsd")]
let (cur_oid, max_oid) = ("kern.openfiles", "kern.maxfiles");
#[cfg(target_os = "macos")]
let (cur_oid, max_oid) = ("kern.num_files", "kern.maxfiles");
#[cfg(any(target_os = "openbsd", target_os = "netbsd"))]
let (cur_oid, max_oid) = ("kern.nfiles", "kern.maxfiles");
Comment thread
ljedrz marked this conversation as resolved.
#[cfg(not(any(target_os = "freebsd", target_os = "macos", target_os = "openbsd", target_os = "netbsd")))]
return Err(std::io::Error::new(std::io::ErrorKind::Unsupported, "system fd probe unsupported on this OS"));

fn read(oid: &str) -> std::io::Result<u64> {
let out = std::process::Command::new("sysctl").arg("-n").arg(oid).output()?;
if !out.status.success() {
return Err(std::io::Error::new(std::io::ErrorKind::NotFound, format!("sysctl {oid} unavailable")));
}
String::from_utf8_lossy(&out.stdout)
.trim()
.parse()
.map_err(|_| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("bad value for {oid}")))
}

Ok(SystemFd { allocated: read(cur_oid)?, max: read(max_oid)? })
}

pub fn spawn_fd_monitor() {
tokio::spawn(async move {
let mut tick = interval(Duration::from_secs(30));
tick.set_missed_tick_behavior(MissedTickBehavior::Skip);

loop {
tick.tick().await;

// (1) the node's own fds
match fd_usage() {
Ok(u) => {
if let Some(limit) = u.soft_limit {
let (pct, left) = (u.ratio() * 100.0, limit.saturating_sub(u.open));
if u.ratio() >= 0.95 {
error!(
scope = "process",
open = u.open,
limit,
left,
pct = format!("{pct:.1}%"),
"node fd usage critical"
);
} else if u.ratio() >= 0.80 {
warn!(
scope = "process",
open = u.open,
limit,
left,
pct = format!("{pct:.1}%"),
"node fd usage elevated"
);
}
}
}
Err(e) => error!(error = %e, "process fd probe failed"),
}

// (2) whole-machine fds
Comment thread
ljedrz marked this conversation as resolved.
Outdated
match system_fd_usage() {
Ok(s) => {
let (pct, left) = (s.ratio() * 100.0, s.max.saturating_sub(s.allocated));
if s.ratio() >= 0.90 {
error!(
scope = "system",
allocated = s.allocated,
max = s.max,
left,
pct = format!("{pct:.1}%"),
"system-wide fd usage critical"
);
} else if s.ratio() >= 0.75 {
warn!(
scope = "system",
allocated = s.allocated,
max = s.max,
left,
pct = format!("{pct:.1}%"),
"system-wide fd usage elevated"
);
}
}
Err(e) => error!(error = %e, "system fd probe failed"),
}
}
});
}
20 changes: 10 additions & 10 deletions cli/src/helpers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ use log_writer::*;
mod dynamic_format;
use dynamic_format::*;

#[cfg(target_family = "unix")]
mod fd_check;
#[cfg(target_family = "unix")]
pub use fd_check::*;
Comment thread
ljedrz marked this conversation as resolved.

pub(crate) mod args;

pub mod logger;
Expand All @@ -47,16 +52,11 @@ pub fn check_open_files_limit(minimum: u64) {
Ok((soft_limit, _)) => {
// Check if requirements are met.
if soft_limit < minimum {
// Warn about too low limit.
let warning = [
format!("⚠️ The open files limit ({soft_limit}) for this process is lower than recommended."),
format!(" • To ensure correct behavior of the node, please raise it to at least {minimum}."),
" • See the `ulimit` command and `/etc/security/limits.conf` for more details.".to_owned(),
]
.join("\n")
.yellow()
.bold();
eprintln!("{warning}\n");
panic!(
"The open files limit ({soft_limit}) for this process is too low. \
Please raise it to at least {minimum} \
See the `ulimit` command and `/etc/security/limits.conf` for more details.",
);
Comment thread
ljedrz marked this conversation as resolved.
}
}
Err(err) => {
Expand Down