@@ -220,23 +220,26 @@ export class GatewayManager extends EventEmitter {
220220 this . setStatus ( { state : 'starting' , reconnectAttempts : 0 } ) ;
221221
222222 try {
223- // Check if Python environment is ready (self-healing)
224- const pythonReady = await isPythonReady ( ) ;
225- if ( ! pythonReady ) {
226- logger . info ( 'Python environment missing or incomplete, attempting background repair...' ) ;
227- // We don't await this to avoid blocking Gateway startup,
228- // as uv run will handle it if needed, but this pre-warms it.
229- void setupManagedPython ( ) . catch ( err => {
230- logger . error ( 'Background Python repair failed:' , err ) ;
231- } ) ;
232- }
223+ // Check if Python environment is ready (self-healing) asynchronously
224+ void isPythonReady ( ) . then ( pythonReady => {
225+ if ( ! pythonReady ) {
226+ logger . info ( 'Python environment missing or incomplete, attempting background repair...' ) ;
227+ // We don't await this to avoid blocking Gateway startup,
228+ // as uv run will handle it if needed, but this pre-warms it.
229+ void setupManagedPython ( ) . catch ( err => {
230+ logger . error ( 'Background Python repair failed:' , err ) ;
231+ } ) ;
232+ }
233+ } ) . catch ( err => {
234+ logger . error ( 'Failed to check Python environment:' , err ) ;
235+ } ) ;
233236
234237 // Check if Gateway is already running
235238 logger . debug ( 'Checking for existing Gateway...' ) ;
236239 const existing = await this . findExistingGateway ( ) ;
237240 if ( existing ) {
238241 logger . debug ( `Found existing Gateway on port ${ existing . port } ` ) ;
239- await this . connect ( existing . port ) ;
242+ await this . connect ( existing . port , existing . externalToken ) ;
240243 this . ownsProcess = false ;
241244 this . setStatus ( { pid : undefined } ) ;
242245 this . startHealthCheck ( ) ;
@@ -300,19 +303,39 @@ export class GatewayManager extends EventEmitter {
300303 // Kill process
301304 if ( this . process && this . ownsProcess ) {
302305 const child = this . process ;
303- logger . info ( `Sending SIGTERM to Gateway (pid=${ child . pid ?? 'unknown' } )` ) ;
304- child . kill ( 'SIGTERM' ) ;
305- // Force kill after timeout
306- setTimeout ( ( ) => {
307- if ( child . exitCode === null ) {
308- logger . warn ( `Gateway did not exit in time, sending SIGKILL (pid=${ child . pid ?? 'unknown' } )` ) ;
309- child . kill ( 'SIGKILL' ) ;
310- }
311- if ( this . process === child ) {
312- this . process = null ;
306+
307+ await new Promise < void > ( ( resolve ) => {
308+ // If process already exited, resolve immediately
309+ if ( child . exitCode !== null || child . signalCode !== null ) {
310+ return resolve ( ) ;
313311 }
314- } , 5000 ) ;
315- this . process = null ;
312+
313+ logger . info ( `Sending SIGTERM to Gateway (pid=${ child . pid ?? 'unknown' } )` ) ;
314+ child . kill ( 'SIGTERM' ) ;
315+
316+ // Force kill after timeout
317+ const timeout = setTimeout ( ( ) => {
318+ if ( child . exitCode === null && child . signalCode === null ) {
319+ logger . warn ( `Gateway did not exit in time, sending SIGKILL (pid=${ child . pid ?? 'unknown' } )` ) ;
320+ child . kill ( 'SIGKILL' ) ;
321+ }
322+ resolve ( ) ;
323+ } , 5000 ) ;
324+
325+ child . once ( 'exit' , ( ) => {
326+ clearTimeout ( timeout ) ;
327+ resolve ( ) ;
328+ } ) ;
329+
330+ child . once ( 'error' , ( ) => {
331+ clearTimeout ( timeout ) ;
332+ resolve ( ) ;
333+ } ) ;
334+ } ) ;
335+
336+ if ( this . process === child ) {
337+ this . process = null ;
338+ }
316339 }
317340 this . ownsProcess = false ;
318341
@@ -332,8 +355,6 @@ export class GatewayManager extends EventEmitter {
332355 async restart ( ) : Promise < void > {
333356 logger . debug ( 'Gateway restart requested' ) ;
334357 await this . stop ( ) ;
335- // Brief delay before restart
336- await new Promise ( resolve => setTimeout ( resolve , 1000 ) ) ;
337358 await this . start ( ) ;
338359 }
339360
@@ -445,11 +466,46 @@ export class GatewayManager extends EventEmitter {
445466 /**
446467 * Find existing Gateway process by attempting a WebSocket connection
447468 */
448- private async findExistingGateway ( ) : Promise < { port : number } | null > {
469+ private async findExistingGateway ( ) : Promise < { port : number , externalToken ?: string } | null > {
449470 try {
450471 const port = PORTS . OPENCLAW_GATEWAY ;
472+
473+ try {
474+ const { stdout } = await new Promise < { stdout : string } > ( ( resolve ) => {
475+ import ( 'child_process' ) . then ( cp => {
476+ cp . exec ( `lsof -i :${ port } | grep LISTEN` , ( err , stdout ) => {
477+ if ( err ) resolve ( { stdout : '' } ) ;
478+ else resolve ( { stdout } ) ;
479+ } ) ;
480+ } ) ;
481+ } ) ;
482+
483+ if ( stdout . trim ( ) ) {
484+ // A process is listening on the port
485+ const pids = stdout . split ( '\n' )
486+ . map ( line => line . trim ( ) . split ( / \s + / ) [ 1 ] )
487+ . filter ( pid => pid && pid !== 'PID' ) ;
488+
489+ if ( pids . length > 0 ) {
490+ // Try to kill it if it's not us to avoid connection issues
491+ // This happens frequently on HMR / dev reloads
492+ if ( ! this . process || ! pids . includes ( String ( this . process . pid ) ) ) {
493+ logger . info ( `Found orphaned process listening on port ${ port } (PID: ${ pids [ 0 ] } ), attempting to kill...` ) ;
494+ for ( const pid of pids ) {
495+ try { process . kill ( parseInt ( pid ) , 'SIGKILL' ) ; } catch { /* ignore */ }
496+ }
497+ // Wait a moment for port to be released
498+ await new Promise ( r => setTimeout ( r , 500 ) ) ;
499+ return null ; // Return null so we start a fresh instance
500+ }
501+ }
502+ }
503+ } catch ( err ) {
504+ logger . debug ( 'Error checking for existing process on port:' , err ) ;
505+ }
506+
451507 // Try a quick WebSocket connection to check if gateway is listening
452- return await new Promise < { port : number } | null > ( ( resolve ) => {
508+ return await new Promise < { port : number , externalToken ?: string } | null > ( ( resolve ) => {
453509 const testWs = new WebSocket ( `ws://localhost:${ port } /ws` ) ;
454510 const timeout = setTimeout ( ( ) => {
455511 testWs . close ( ) ;
@@ -675,12 +731,13 @@ export class GatewayManager extends EventEmitter {
675731 /**
676732 * Wait for Gateway to be ready by checking if the port is accepting connections
677733 */
678- private async waitForReady ( retries = 600 , interval = 1000 ) : Promise < void > {
734+ private async waitForReady ( retries = 2400 , interval = 250 ) : Promise < void > {
735+ const child = this . process ;
679736 for ( let i = 0 ; i < retries ; i ++ ) {
680737 // Early exit if the gateway process has already exited
681- if ( this . process && ( this . process . exitCode !== null || this . process . signalCode !== null ) ) {
682- const code = this . process . exitCode ;
683- const signal = this . process . signalCode ;
738+ if ( child && ( child . exitCode !== null || child . signalCode !== null ) ) {
739+ const code = child . exitCode ;
740+ const signal = child . signalCode ;
684741 logger . error ( `Gateway process exited before ready (${ this . formatExit ( code , signal ) } )` ) ;
685742 throw new Error ( `Gateway process exited before becoming ready (${ this . formatExit ( code , signal ) } )` ) ;
686743 }
@@ -727,9 +784,7 @@ export class GatewayManager extends EventEmitter {
727784 /**
728785 * Connect WebSocket to Gateway
729786 */
730- private async connect ( port : number ) : Promise < void > {
731- // Get token for WebSocket authentication
732- const gatewayToken = await getSetting ( 'gatewayToken' ) ;
787+ private async connect ( port : number , _externalToken ?: string ) : Promise < void > {
733788 logger . debug ( `Connecting Gateway WebSocket (ws://localhost:${ port } /ws)` ) ;
734789
735790 return new Promise ( ( resolve , reject ) => {
@@ -774,6 +829,9 @@ export class GatewayManager extends EventEmitter {
774829 this . ws . on ( 'open' , async ( ) => {
775830 logger . debug ( 'Gateway WebSocket opened, sending connect handshake' ) ;
776831
832+ // Re-fetch token here before generating payload just in case it updated while connecting
833+ const currentToken = await getSetting ( 'gatewayToken' ) ;
834+
777835 // Send proper connect handshake as required by OpenClaw Gateway protocol
778836 // The Gateway expects: { type: "req", id: "...", method: "connect", params: ConnectParams }
779837 // Since 2026.2.15, scopes are only granted when a signed device identity is included.
@@ -786,14 +844,15 @@ export class GatewayManager extends EventEmitter {
786844
787845 const device = ( ( ) => {
788846 if ( ! this . deviceIdentity ) return undefined ;
847+
789848 const payload = buildDeviceAuthPayload ( {
790849 deviceId : this . deviceIdentity . deviceId ,
791850 clientId,
792851 clientMode,
793852 role,
794853 scopes,
795854 signedAtMs,
796- token : gatewayToken ?? null ,
855+ token : currentToken ?? null ,
797856 } ) ;
798857 const signature = signDevicePayload ( this . deviceIdentity . privateKeyPem , payload ) ;
799858 return {
@@ -819,7 +878,7 @@ export class GatewayManager extends EventEmitter {
819878 mode : clientMode ,
820879 } ,
821880 auth : {
822- token : gatewayToken ,
881+ token : currentToken ,
823882 } ,
824883 caps : [ ] ,
825884 role,
@@ -1061,7 +1120,7 @@ export class GatewayManager extends EventEmitter {
10611120 // Try to find existing Gateway first
10621121 const existing = await this . findExistingGateway ( ) ;
10631122 if ( existing ) {
1064- await this . connect ( existing . port ) ;
1123+ await this . connect ( existing . port , existing . externalToken ) ;
10651124 this . ownsProcess = false ;
10661125 this . setStatus ( { pid : undefined } ) ;
10671126 this . reconnectAttempts = 0 ;
0 commit comments