@@ -229,23 +229,26 @@ export class GatewayManager extends EventEmitter {
229229 this . setStatus ( { state : 'starting' , reconnectAttempts : 0 } ) ;
230230
231231 try {
232- // Check if Python environment is ready (self-healing)
233- const pythonReady = await isPythonReady ( ) ;
234- if ( ! pythonReady ) {
235- logger . info ( 'Python environment missing or incomplete, attempting background repair...' ) ;
236- // We don't await this to avoid blocking Gateway startup,
237- // as uv run will handle it if needed, but this pre-warms it.
238- void setupManagedPython ( ) . catch ( err => {
239- logger . error ( 'Background Python repair failed:' , err ) ;
240- } ) ;
241- }
232+ // Check if Python environment is ready (self-healing) asynchronously
233+ void isPythonReady ( ) . then ( pythonReady => {
234+ if ( ! pythonReady ) {
235+ logger . info ( 'Python environment missing or incomplete, attempting background repair...' ) ;
236+ // We don't await this to avoid blocking Gateway startup,
237+ // as uv run will handle it if needed, but this pre-warms it.
238+ void setupManagedPython ( ) . catch ( err => {
239+ logger . error ( 'Background Python repair failed:' , err ) ;
240+ } ) ;
241+ }
242+ } ) . catch ( err => {
243+ logger . error ( 'Failed to check Python environment:' , err ) ;
244+ } ) ;
242245
243246 // Check if Gateway is already running
244247 logger . debug ( 'Checking for existing Gateway...' ) ;
245248 const existing = await this . findExistingGateway ( ) ;
246249 if ( existing ) {
247250 logger . debug ( `Found existing Gateway on port ${ existing . port } ` ) ;
248- await this . connect ( existing . port ) ;
251+ await this . connect ( existing . port , existing . externalToken ) ;
249252 this . ownsProcess = false ;
250253 this . setStatus ( { pid : undefined } ) ;
251254 this . startHealthCheck ( ) ;
@@ -318,33 +321,39 @@ export class GatewayManager extends EventEmitter {
318321 // Kill process and wait for it to actually exit (so the port is released)
319322 if ( this . process && this . ownsProcess ) {
320323 const child = this . process ;
321- this . process = null ;
322- logger . info ( `Sending SIGTERM to Gateway (pid=${ child . pid ?? 'unknown' } )` ) ;
323- child . kill ( 'SIGTERM' ) ;
324-
325- // Wait for the process to exit, with a SIGKILL fallback
324+
326325 await new Promise < void > ( ( resolve ) => {
327- const killTimer = setTimeout ( ( ) => {
326+ // If process already exited, resolve immediately
327+ if ( child . exitCode !== null || child . signalCode !== null ) {
328+ return resolve ( ) ;
329+ }
330+
331+ logger . info ( `Sending SIGTERM to Gateway (pid=${ child . pid ?? 'unknown' } )` ) ;
332+ child . kill ( 'SIGTERM' ) ;
333+
334+ // Force kill after timeout
335+ const timeout = setTimeout ( ( ) => {
328336 if ( child . exitCode === null && child . signalCode === null ) {
329337 logger . warn ( `Gateway did not exit in time, sending SIGKILL (pid=${ child . pid ?? 'unknown' } )` ) ;
330- try { child . kill ( 'SIGKILL' ) ; } catch { /* already dead */ }
338+ child . kill ( 'SIGKILL' ) ;
331339 }
332- } , 3000 ) ;
333-
334- const done = ( ) => {
335- clearTimeout ( killTimer ) ;
336340 resolve ( ) ;
337- } ;
341+ } , 5000 ) ;
338342
339- // If the process already exited before we got here
340- if ( child . exitCode !== null || child . signalCode !== null ) {
341- done ( ) ;
342- return ;
343- }
344- child . once ( 'exit' , done ) ;
345- // Safety cap: resolve after 6s even if exit event never fires
346- setTimeout ( done , 6000 ) ;
343+ child . once ( 'exit' , ( ) => {
344+ clearTimeout ( timeout ) ;
345+ resolve ( ) ;
346+ } ) ;
347+
348+ child . once ( 'error' , ( ) => {
349+ clearTimeout ( timeout ) ;
350+ resolve ( ) ;
351+ } ) ;
347352 } ) ;
353+
354+ if ( this . process === child ) {
355+ this . process = null ;
356+ }
348357 }
349358 this . ownsProcess = false ;
350359
@@ -364,18 +373,6 @@ export class GatewayManager extends EventEmitter {
364373 async restart ( ) : Promise < void > {
365374 logger . debug ( 'Gateway restart requested' ) ;
366375 await this . stop ( ) ;
367- // Wait for any in-flight start() to finish unwinding after abort
368- const maxWait = 10 ;
369- for ( let i = 0 ; i < maxWait && this . startLock ; i ++ ) {
370- await new Promise ( resolve => setTimeout ( resolve , 500 ) ) ;
371- }
372- if ( this . startLock ) {
373- logger . warn ( 'Gateway restart: startLock still held after waiting, forcing release' ) ;
374- this . startLock = false ;
375- this . startAbort = null ;
376- }
377- // Brief delay before restart
378- await new Promise ( resolve => setTimeout ( resolve , 500 ) ) ;
379376 await this . start ( ) ;
380377 }
381378
@@ -487,11 +484,46 @@ export class GatewayManager extends EventEmitter {
487484 /**
488485 * Find existing Gateway process by attempting a WebSocket connection
489486 */
490- private async findExistingGateway ( ) : Promise < { port : number } | null > {
487+ private async findExistingGateway ( ) : Promise < { port : number , externalToken ?: string } | null > {
491488 try {
492489 const port = PORTS . OPENCLAW_GATEWAY ;
490+
491+ try {
492+ const { stdout } = await new Promise < { stdout : string } > ( ( resolve ) => {
493+ import ( 'child_process' ) . then ( cp => {
494+ cp . exec ( `lsof -i :${ port } | grep LISTEN` , ( err , stdout ) => {
495+ if ( err ) resolve ( { stdout : '' } ) ;
496+ else resolve ( { stdout } ) ;
497+ } ) ;
498+ } ) ;
499+ } ) ;
500+
501+ if ( stdout . trim ( ) ) {
502+ // A process is listening on the port
503+ const pids = stdout . split ( '\n' )
504+ . map ( line => line . trim ( ) . split ( / \s + / ) [ 1 ] )
505+ . filter ( pid => pid && pid !== 'PID' ) ;
506+
507+ if ( pids . length > 0 ) {
508+ // Try to kill it if it's not us to avoid connection issues
509+ // This happens frequently on HMR / dev reloads
510+ if ( ! this . process || ! pids . includes ( String ( this . process . pid ) ) ) {
511+ logger . info ( `Found orphaned process listening on port ${ port } (PID: ${ pids [ 0 ] } ), attempting to kill...` ) ;
512+ for ( const pid of pids ) {
513+ try { process . kill ( parseInt ( pid ) , 'SIGKILL' ) ; } catch { /* ignore */ }
514+ }
515+ // Wait a moment for port to be released
516+ await new Promise ( r => setTimeout ( r , 500 ) ) ;
517+ return null ; // Return null so we start a fresh instance
518+ }
519+ }
520+ }
521+ } catch ( err ) {
522+ logger . debug ( 'Error checking for existing process on port:' , err ) ;
523+ }
524+
493525 // Try a quick WebSocket connection to check if gateway is listening
494- return await new Promise < { port : number } | null > ( ( resolve ) => {
526+ return await new Promise < { port : number , externalToken ?: string } | null > ( ( resolve ) => {
495527 const testWs = new WebSocket ( `ws://localhost:${ port } /ws` ) ;
496528 const timeout = setTimeout ( ( ) => {
497529 testWs . close ( ) ;
@@ -717,8 +749,10 @@ export class GatewayManager extends EventEmitter {
717749 /**
718750 * Wait for Gateway to be ready by checking if the port is accepting connections
719751 */
720- private async waitForReady ( retries = 600 , interval = 1000 ) : Promise < void > {
752+ private async waitForReady ( retries = 2400 , interval = 250 ) : Promise < void > {
753+ const child = this . process ;
721754 for ( let i = 0 ; i < retries ; i ++ ) {
755+ < < < << << HEAD
722756 // Abort if stop() was called while we are still waiting
723757 if ( this . startAbort ?. signal . aborted ) {
724758 logger . info ( 'waitForReady aborted by stop request' ) ;
@@ -733,6 +767,12 @@ export class GatewayManager extends EventEmitter {
733767 if ( this . process . exitCode !== null || this . process . signalCode !== null ) {
734768 const code = this. process . exitCode ;
735769 const signal = this. process . signalCode ;
770+ === === =
771+ // Early exit if the gateway process has already exited
772+ if ( child && ( child . exitCode !== null || child . signalCode !== null ) ) {
773+ const code = child . exitCode ;
774+ const signal = child . signalCode ;
775+ >>> > >>> upstream / main
736776 logger . error ( `Gateway process exited before ready (${ this . formatExit ( code , signal ) } )` ) ;
737777 throw new Error ( `Gateway process exited before becoming ready (${ this . formatExit ( code , signal ) } )` ) ;
738778 }
@@ -779,9 +819,7 @@ export class GatewayManager extends EventEmitter {
779819 /**
780820 * Connect WebSocket to Gateway
781821 */
782- private async connect ( port : number ) : Promise < void > {
783- // Get token for WebSocket authentication
784- const gatewayToken = await getSetting ( 'gatewayToken' ) ;
822+ private async connect ( port : number , _externalToken ?: string) : Promise < void > {
785823 logger . debug ( `Connecting Gateway WebSocket (ws://localhost:${ port } /ws)` ) ;
786824
787825 return new Promise ( ( resolve , reject ) => {
@@ -826,6 +864,9 @@ export class GatewayManager extends EventEmitter {
826864 this . ws . on ( 'open' , async ( ) => {
827865 logger . debug ( 'Gateway WebSocket opened, sending connect handshake' ) ;
828866
867+ // Re-fetch token here before generating payload just in case it updated while connecting
868+ const currentToken = await getSetting ( 'gatewayToken' ) ;
869+
829870 // Send proper connect handshake as required by OpenClaw Gateway protocol
830871 // The Gateway expects: { type: "req", id: "...", method: "connect", params: ConnectParams }
831872 // Since 2026.2.15, scopes are only granted when a signed device identity is included.
@@ -838,14 +879,15 @@ export class GatewayManager extends EventEmitter {
838879
839880 const device = ( ( ) => {
840881 if ( ! this . deviceIdentity ) return undefined ;
882+
841883 const payload = buildDeviceAuthPayload ( {
842884 deviceId : this . deviceIdentity . deviceId ,
843885 clientId,
844886 clientMode,
845887 role,
846888 scopes,
847889 signedAtMs,
848- token : gatewayToken ?? null ,
890+ token : currentToken ?? null ,
849891 } ) ;
850892 const signature = signDevicePayload ( this . deviceIdentity . privateKeyPem , payload ) ;
851893 return {
@@ -871,7 +913,7 @@ export class GatewayManager extends EventEmitter {
871913 mode : clientMode ,
872914 } ,
873915 auth : {
874- token : gatewayToken ,
916+ token : currentToken ,
875917 } ,
876918 caps : [ ] ,
877919 role,
@@ -1113,7 +1155,7 @@ export class GatewayManager extends EventEmitter {
11131155 // Try to find existing Gateway first
11141156 const existing = await this . findExistingGateway ( ) ;
11151157 if ( existing ) {
1116- await this . connect ( existing . port ) ;
1158+ await this . connect ( existing . port , existing . externalToken ) ;
11171159 this . ownsProcess = false ;
11181160 this . setStatus ( { pid : undefined } ) ;
11191161 this . reconnectAttempts = 0 ;
0 commit comments