From 2c4f9f57fdc2f67976622af8d201774f3bdf8f73 Mon Sep 17 00:00:00 2001 From: arnav-makkar <142909930+arnav-makkar@users.noreply.github.com> Date: Thu, 20 Mar 2025 19:57:03 +0530 Subject: [PATCH 1/2] added logs for low disk space --- server.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server.go b/server.go index a848340d55..b3c94b8c03 100644 --- a/server.go +++ b/server.go @@ -2022,6 +2022,11 @@ func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl, return nil } + // If disk space is critically low (less than 5%), use critical logging + if free < 0.05 { + srvrLog.Criticalf("CRITICAL: Disk space critically low: %.1f%% free space remaining", free*100) + } + return fmt.Errorf("require: %v free space, got: %v", cfg.HealthChecks.DiskCheck.RequiredRemaining, free) From acffe14c0d59b3cc436b535828a1b45f0d7c5a40 Mon Sep 17 00:00:00 2001 From: arnav-makkar <142909930+arnav-makkar@users.noreply.github.com> Date: Fri, 21 Mar 2025 00:01:57 +0530 Subject: [PATCH 2/2] added file descriptor check, changed criticalf to errorf --- healthcheck/fdcheck.go | 26 ++++++++++++++++++++++++++ lncfg/healthcheck.go | 6 ++++++ server.go | 27 +++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 healthcheck/fdcheck.go diff --git a/healthcheck/fdcheck.go b/healthcheck/fdcheck.go new file mode 100644 index 0000000000..29ab50b2c5 --- /dev/null +++ b/healthcheck/fdcheck.go @@ -0,0 +1,26 @@ +package healthcheck + +import ( + "errors" + "fmt" + "os" + "syscall" +) + +// CheckFileDescriptors checks if there are any free file descriptors available. +// It returns an error if no free file descriptors are available or if an unexpected error occurs. +func CheckFileDescriptors() error { + // Attempt to open /dev/null to test for available file descriptors + fd, err := os.OpenFile(os.DevNull, os.O_RDONLY, 0) + if err != nil { + // Check if the error is due to "too many open files" + if errors.Is(err, syscall.EMFILE) { + return fmt.Errorf("no free file descriptors available") + } + + return fmt.Errorf("error checking file descriptors: %w", err) + } + + fd.Close() + return nil +} diff --git a/lncfg/healthcheck.go b/lncfg/healthcheck.go index 7904eaceaa..3a16446b75 100644 --- a/lncfg/healthcheck.go +++ b/lncfg/healthcheck.go @@ -29,6 +29,8 @@ type HealthCheckConfig struct { DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"` + FileDescriptorCheck *DiskCheckConfig `group:"file_descriptor" namespace:"file_descriptor"` + TLSCheck *CheckConfig `group:"tls" namespace:"tls"` TorConnection *CheckConfig `group:"torconnection" namespace:"torconnection"` @@ -48,6 +50,10 @@ func (h *HealthCheckConfig) Validate() error { return err } + if err := h.FileDescriptorCheck.validate("file descriptor"); err != nil { + return err + } + if err := h.TLSCheck.validate("tls"); err != nil { return err } diff --git a/server.go b/server.go index b3c94b8c03..79b6b6e734 100644 --- a/server.go +++ b/server.go @@ -2022,9 +2022,12 @@ func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl, return nil } - // If disk space is critically low (less than 5%), use critical logging - if free < 0.05 { - srvrLog.Criticalf("CRITICAL: Disk space critically low: %.1f%% free space remaining", free*100) + // Define the critical threshold (e.g., 5%) + const criticalThreshold = 0.05 + + // If free space is lesser than critical value, log a warning + if free < criticalThreshold { + srvrLog.Errorf("Disk space low: %.1f%% free space remaining", free*100) } return fmt.Errorf("require: %v free space, got: %v", @@ -2037,6 +2040,22 @@ func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl, cfg.HealthChecks.DiskCheck.Attempts, ) + // Add file descriptor check + fdCheck := healthcheck.NewObservation( + "file descriptors", + func() error { + if err := healthcheck.CheckFileDescriptors(); err != nil { + srvrLog.Criticalf("CRITICAL: No free file descriptors available: %v", err) + return err + } + return nil + }, + cfg.HealthChecks.FileDescriptorCheck.Interval, + cfg.HealthChecks.FileDescriptorCheck.Timeout, + cfg.HealthChecks.FileDescriptorCheck.Backoff, + cfg.HealthChecks.FileDescriptorCheck.Attempts, + ) + tlsHealthCheck := healthcheck.NewObservation( "tls", func() error { @@ -2062,7 +2081,7 @@ func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl, ) checks := []*healthcheck.Observation{ - chainHealthCheck, diskCheck, tlsHealthCheck, + chainHealthCheck, diskCheck, fdCheck, tlsHealthCheck, } // If Tor is enabled, add the healthcheck for tor connection.