Skip to content

Commit 77c11a5

Browse files
committed
Fix error handling for worker
1 parent 138d24d commit 77c11a5

1 file changed

Lines changed: 6 additions & 4 deletions

File tree

gllm/worker.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,15 +194,17 @@ def run_other(self):
194194
self.forward_pp()
195195

196196
def handle_keyboardInterrupt(self):
197-
logger.info(f'Exit')
198-
dist.destroy_process_group()
199197
self.mp_alive[self.local_rank] = -1
198+
logger.info(f'Exit')
199+
if dist.is_initialized():
200+
dist.destroy_process_group()
200201

201202
def handle_exception(self, e):
203+
self.mp_alive[self.local_rank] = -1
202204
logger.error(e)
203205
traceback.print_exc()
204-
dist.destroy_process_group()
205-
self.mp_alive[self.local_rank] = -1
206+
if dist.is_initialized():
207+
dist.destroy_process_group()
206208

207209

208210
def run_worker(worker: Worker):

0 commit comments

Comments
 (0)