@@ -740,7 +740,8 @@ pub async fn run_daemon(
740740 if info_path. exists ( ) {
741741 if let Ok ( existing) = std:: fs:: read_to_string ( info_path) {
742742 if let Ok ( info) = serde_json:: from_str :: < DaemonInfo > ( & existing) {
743- if is_process_alive ( info. pid ) {
743+ // PID alive AND the health endpoint responds → truly running
744+ if is_process_alive ( info. pid ) && is_daemon_responding ( & info. listen_addr ) {
744745 return Err ( format ! (
745746 "Another daemon (PID {}) is already running at {}" ,
746747 info. pid, info. listen_addr
@@ -749,7 +750,8 @@ pub async fn run_daemon(
749750 }
750751 }
751752 }
752- // Stale PID file, remove it
753+ // Stale PID file (process dead or different process reused PID), remove it
754+ info ! ( "Removing stale daemon info file" ) ;
753755 let _ = std:: fs:: remove_file ( info_path) ;
754756 }
755757
@@ -771,7 +773,22 @@ pub async fn run_daemon(
771773 info ! ( "WebChat UI available at http://{addr}/" , ) ;
772774 info ! ( "WebSocket endpoint: ws://{addr}/api/agents/{{id}}/ws" , ) ;
773775
774- let listener = tokio:: net:: TcpListener :: bind ( addr) . await ?;
776+ // Use SO_REUSEADDR to allow binding immediately after reboot (avoids TIME_WAIT).
777+ let socket = socket2:: Socket :: new (
778+ if addr. is_ipv4 ( ) {
779+ socket2:: Domain :: IPV4
780+ } else {
781+ socket2:: Domain :: IPV6
782+ } ,
783+ socket2:: Type :: STREAM ,
784+ None ,
785+ ) ?;
786+ socket. set_reuse_address ( true ) ?;
787+ socket. set_nonblocking ( true ) ?;
788+ socket. bind ( & addr. into ( ) ) ?;
789+ socket. listen ( 1024 ) ?;
790+ let listener =
791+ tokio:: net:: TcpListener :: from_std ( std:: net:: TcpListener :: from ( socket) ) ?;
775792
776793 // Run server with graceful shutdown.
777794 // SECURITY: `into_make_service_with_connect_info` injects the peer
@@ -891,3 +908,26 @@ fn is_process_alive(pid: u32) -> bool {
891908 false
892909 }
893910}
911+
912+ /// Check if an OpenFang daemon is actually responding at the given address.
913+ /// This avoids false positives where a different process reused the same PID
914+ /// after a system reboot.
915+ fn is_daemon_responding ( addr : & str ) -> bool {
916+ // Quick TCP connect check — don't make a full HTTP request to avoid delays
917+ let addr_only = addr
918+ . strip_prefix ( "http://" )
919+ . or_else ( || addr. strip_prefix ( "https://" ) )
920+ . unwrap_or ( addr) ;
921+ if let Ok ( sock_addr) = addr_only. parse :: < std:: net:: SocketAddr > ( ) {
922+ std:: net:: TcpStream :: connect_timeout (
923+ & sock_addr,
924+ std:: time:: Duration :: from_millis ( 500 ) ,
925+ )
926+ . is_ok ( )
927+ } else {
928+ // Fallback: try connecting to hostname
929+ std:: net:: TcpStream :: connect ( addr_only)
930+ . map ( |_| true )
931+ . unwrap_or ( false )
932+ }
933+ }
0 commit comments