@@ -2599,7 +2599,7 @@ def _transition_memory_released(self, key: Key, stimulus_id: str) -> RecsMsgs:
25992599 if ts .who_wants :
26002600 ts .exception_blame = ts
26012601 ts .exception = Serialized (
2602- * serialize (ValueError ("Worker holding Actor was lost" ))
2602+ * serialize (RuntimeError ("Worker holding Actor was lost" ))
26032603 )
26042604 return {ts .key : "erred" }, {}, {} # don't try to recreate
26052605
@@ -2652,7 +2652,7 @@ def _transition_released_erred(self, key: Key, stimulus_id: str) -> RecsMsgs:
26522652
26532653 if self .validate :
26542654 assert ts .exception_blame
2655- assert not ts .who_has
2655+ assert not ts .who_has or ts . actor
26562656 assert not ts .waiting_on
26572657
26582658 failing_ts = ts .exception_blame
@@ -2772,8 +2772,8 @@ def _transition_processing_erred(
27722772 self ,
27732773 key : Key ,
27742774 stimulus_id : str ,
2775- worker : str ,
27762775 * ,
2776+ worker : str | None = None ,
27772777 cause : Key | None = None ,
27782778 exception : Serialized | None = None ,
27792779 traceback : Serialized | None = None ,
@@ -2988,6 +2988,45 @@ def _remove_key(self, key: Key) -> None:
29882988 ts .exception_blame = ts .exception = ts .traceback = None
29892989 self .task_metadata .pop (key , None )
29902990
2991+ def _transition_memory_erred (self , key : Key , stimulus_id : str ) -> RecsMsgs :
2992+ ts = self .tasks [key ]
2993+ if self .validate :
2994+ assert ts .actor
2995+ recommendations : Recs = {}
2996+ client_msgs : Msgs = {}
2997+ worker_msgs : Msgs = {}
2998+ # XXX factor this out?
2999+ worker_msg = {
3000+ "op" : "free-keys" ,
3001+ "keys" : [key ],
3002+ "stimulus_id" : stimulus_id ,
3003+ }
3004+ for ws in ts .who_has or ():
3005+ worker_msgs [ws .address ] = [worker_msg ]
3006+ self .remove_all_replicas (ts )
3007+
3008+ for dts in ts .dependents :
3009+ if not dts .who_has :
3010+ dts .exception_blame = ts
3011+ recommendations [dts .key ] = "erred"
3012+ exception = Serialized (
3013+ * serialize (RuntimeError ("Worker holding Actor was lost" ))
3014+ )
3015+ report_msg = {
3016+ "op" : "task-erred" ,
3017+ "key" : key ,
3018+ "exception" : exception ,
3019+ }
3020+ for cs in ts .who_wants or ():
3021+ client_msgs [cs .client_key ] = [report_msg ]
3022+
3023+ ts .state = "erred"
3024+ return self ._propagate_erred (
3025+ ts ,
3026+ cause = ts .key ,
3027+ exception = exception ,
3028+ )
3029+
29913030 def _transition_memory_forgotten (self , key : Key , stimulus_id : str ) -> RecsMsgs :
29923031 ts = self .tasks [key ]
29933032
@@ -3078,6 +3117,7 @@ def _transition_released_forgotten(self, key: Key, stimulus_id: str) -> RecsMsgs
30783117 ("no-worker" , "processing" ): _transition_no_worker_processing ,
30793118 ("no-worker" , "erred" ): _transition_no_worker_erred ,
30803119 ("released" , "forgotten" ): _transition_released_forgotten ,
3120+ ("memory" , "erred" ): _transition_memory_erred ,
30813121 ("memory" , "forgotten" ): _transition_memory_forgotten ,
30823122 ("erred" , "released" ): _transition_erred_released ,
30833123 ("memory" , "released" ): _transition_memory_released ,
@@ -5521,7 +5561,9 @@ async def remove_worker(
55215561
55225562 for ts in list (ws .has_what ):
55235563 self .remove_replica (ts , ws )
5524- if not ts .who_has :
5564+ if ts in ws .actors :
5565+ recommendations [ts .key ] = "erred"
5566+ elif not ts .who_has :
55255567 if ts .run_spec :
55265568 recompute_keys .add (ts .key )
55275569 recommendations [ts .key ] = "released"
0 commit comments