@@ -106,6 +106,7 @@ function startServer() {
106106 } , 3000 ) ;
107107
108108 logger . info ( 'start master....' ) ;
109+ logger . info ( `config from: ${ config . configFrom } ` ) ;
109110 logger . info ( 'version node: ${node}, modules: ${modules}' , process . versions ) ;
110111
111112 if ( serverOS . isLinux ) {
@@ -140,6 +141,7 @@ function startServer() {
140141
141142 process . title = 'TSW/worker/node' ;
142143 logger . info ( 'start worker....' ) ;
144+ logger . info ( `config from: ${ config . configFrom } ` ) ;
143145 require ( './http.proxy.js' ) ;
144146 require ( 'runtime/jank.watcher.js' ) ;
145147
@@ -181,28 +183,27 @@ function closeWorker(worker) {
181183 closeTimeWait = Math . max ( closeTimeWait , config . timeout . keepAlive ) ;
182184 closeTimeWait = Math . min ( 60000 , closeTimeWait ) || 10000 ;
183185
184- if ( worker . isClosing ) {
186+ if ( worker . exitedAfterDisconnect ) {
187+ logger . info ( 'worker.exitedAfterDisconnect is true' ) ;
185188 return ;
186189 }
187190
188- worker . isClosing = true ;
189-
190191 if ( workerMap [ cpu ] === worker ) {
191192 delete workerMap [ cpu ] ;
192193 }
193194
194195 const closeFn = ( function ( worker ) {
195196 let closed = false ;
196- const pid = worker . process . pid ;
197197
198198 return function ( ) {
199199 if ( closed ) {
200200 return ;
201201 }
202202 try {
203- process . kill ( pid , 9 ) ;
203+ worker . kill ( 9 ) ;
204+ logger . info ( `worker/${ worker . cpuid } ${ worker . process . pid } has killed` ) ;
204205 } catch ( e ) {
205- logger . info ( `kill worker message: ${ e . message } ` ) ;
206+ logger . info ( `worker/ ${ worker . cpuid } kill message: ${ e . message } ` ) ;
206207 }
207208
208209 closed = true ;
@@ -215,7 +216,7 @@ function closeWorker(worker) {
215216 try {
216217 worker . disconnect ( closeFn ) ;
217218 } catch ( e ) {
218- logger . info ( e . stack ) ;
219+ logger . info ( `worker disconnect message: ${ e . message } ` ) ;
219220 }
220221}
221222
@@ -225,19 +226,16 @@ function restartWorker(worker) {
225226 return ;
226227 }
227228
228- worker . hasRestart = true ;
229229 const cpu = getToBindCpu ( worker ) ;
230-
231- cpuMap [ cpu ] = 0 ;
232-
233230 logger . info ( 'worker${cpu} pid=${pid} closed. restart new worker again.' , {
234231 pid : worker . process . pid ,
235232 cpu : cpu
236233 } ) ;
237234
238235 cluster . fork ( process . env ) . cpuid = cpu ;
239-
240236 closeWorker ( worker ) ;
237+
238+ worker . hasRestart = true ;
241239}
242240
243241// 定时检测子进程存活,15秒未响应的采取措施
@@ -261,7 +259,7 @@ function checkWorkerAlive() {
261259 }
262260
263261 // 无响应进程处理
264- if ( now - worker . lastLiveTime > checkWorkerAliveTimeout * 3 && cpuMap [ cpuid ] === 1 ) {
262+ if ( now - worker . lastLiveTime > checkWorkerAliveTimeout * 3 ) {
265263
266264 logger . error ( 'worker${cpu} pid=${pid} miss heartBeat, kill it' , {
267265 pid : worker . process . pid ,
@@ -398,7 +396,6 @@ function masterEventHandler() {
398396 }
399397
400398 workerMap [ cpu ] = currWorker ;
401- cpuMap [ cpu ] = 1 ;
402399
403400 // 监听子进程发来的消息并处理
404401 currWorker . on ( 'message' , function ( ...args ) {
@@ -421,11 +418,7 @@ function masterEventHandler() {
421418 cluster . on ( 'disconnect' , function ( worker ) {
422419 const cpu = getToBindCpu ( worker ) ;
423420
424- if ( worker . hasRestart ) {
425- return ;
426- }
427-
428- logger . info ( 'worker${cpu} pid=${pid} disconnect event fired. restart new worker again.' , {
421+ logger . info ( 'worker${cpu} pid=${pid} disconnect event fired.' , {
429422 pid : worker . process . pid ,
430423 cpu : cpu
431424 } ) ;
@@ -438,11 +431,7 @@ function masterEventHandler() {
438431
439432 const cpu = getToBindCpu ( worker ) ;
440433
441- if ( worker . hasRestart ) {
442- return ;
443- }
444-
445- logger . info ( 'worker${cpu} pid=${pid} exit event fired. restart new worker again.' , {
434+ logger . info ( 'worker${cpu} pid=${pid} exit event fired.' , {
446435 pid : worker . process . pid ,
447436 cpu : cpu
448437 } ) ;
@@ -451,7 +440,7 @@ function masterEventHandler() {
451440 } ) ;
452441
453442 process . on ( 'reload' , function ( GET ) {
454- logger . info ( 'reload' ) ;
443+ logger . info ( 'reload event fired. ' ) ;
455444 tnm2 . Attr_API ( 'SUM_TSW_WORKER_RELOAD' , 1 ) ;
456445
457446 for ( const key in workerMap ) {
@@ -471,8 +460,15 @@ function masterEventHandler() {
471460 logger . info ( 'cpu${cpu} send restart message' , {
472461 cpu : cpu
473462 } ) ;
474- worker . send ( { from : 'master' , cmd : 'restart' } ) ;
463+ try {
464+ worker . send ( { from : 'master' , cmd : 'restart' } ) ;
465+ } catch ( e ) {
466+ logger . info ( 'cpu${cpu} send restart to worker, error message: ${e.message} while' , {
467+ cpu : cpu
468+ } ) ;
469+ }
475470 }
471+
476472 restartWorker ( worker ) ;
477473 } ;
478474 } ) ( worker , cpu ) , timeout ) ;
0 commit comments