File tree Expand file tree Collapse file tree 1 file changed +17
-11
lines changed
Expand file tree Collapse file tree 1 file changed +17
-11
lines changed Original file line number Diff line number Diff line change @@ -360,7 +360,15 @@ async def cancel_quantization(self, task_id: str) -> bool:
360360
361361 shutil .rmtree (output , ignore_errors = True )
362362
363- # Clean up GPU state to prevent Metal errors on next task
363+ # Clean up GPU state to prevent Metal errors on next task.
364+ # asyncio.Task.cancel() doesn't stop the to_thread immediately —
365+ # the thread may still have in-flight Metal commands. Wait for the
366+ # thread to actually finish before touching Metal state.
367+ if active_task :
368+ try :
369+ await asyncio .wait_for (asyncio .shield (active_task ), timeout = 10.0 )
370+ except (asyncio .CancelledError , asyncio .TimeoutError , Exception ):
371+ pass
364372 if HAS_MLX :
365373 try :
366374 mx .synchronize ()
@@ -412,17 +420,15 @@ async def _run_quantization(self, task_id: str) -> None:
412420 return
413421
414422 # Ensure GPU is clean before starting (previous task may have been cancelled)
415- # Metal needs time to fully release command buffers after cancellation
423+ # Metal command buffers need full sync + cache clear after cancellation
416424 if HAS_MLX :
417- try :
418- mx .synchronize ()
419- except Exception :
420- pass
421- await asyncio .sleep (2.0 )
422- try :
423- mx .clear_cache ()
424- except Exception :
425- pass
425+ for _ in range (3 ):
426+ try :
427+ mx .synchronize ()
428+ mx .clear_cache ()
429+ break
430+ except Exception :
431+ await asyncio .sleep (1.0 )
426432
427433 # Phase 1: Loading
428434 task .status = QuantStatus .LOADING
You can’t perform that action at this time.
0 commit comments