19
19
from typing import Dict , Iterable , Optional
20
20
21
21
from tlz import first , keymap , merge , pluck # noqa: F401
22
- from tornado import gen
23
22
from tornado .ioloop import IOLoop , PeriodicCallback
24
23
25
24
import dask
@@ -438,6 +437,7 @@ def __init__(
438
437
439
438
self .active_threads_lock = threading .Lock ()
440
439
self .active_threads = dict ()
440
+ self .active_keys = set ()
441
441
self .profile_keys = defaultdict (profile .create )
442
442
self .profile_keys_history = deque (maxlen = 3600 )
443
443
self .profile_recent = profile .create ()
@@ -968,16 +968,14 @@ async def heartbeat(self):
968
968
logger .debug ("Heartbeat: %s" , self .address )
969
969
try :
970
970
start = time ()
971
- with self .active_threads_lock :
972
- active_keys = list (self .active_threads .values ())
973
971
response = await retry_operation (
974
972
self .scheduler .heartbeat_worker ,
975
973
address = self .contact_address ,
976
974
now = start ,
977
975
metrics = await self .get_metrics (),
978
976
executing = {
979
977
key : start - self .tasks [key ].start_time
980
- for key in active_keys
978
+ for key in self . active_keys
981
979
if key in self .tasks
982
980
},
983
981
)
@@ -2686,41 +2684,6 @@ def release_key(
2686
2684
# Execute Task #
2687
2685
################
2688
2686
2689
- # FIXME: this breaks if changed to async def...
2690
- # xref: https://github.com/dask/distributed/issues/3938
2691
- @gen .coroutine
2692
- def executor_submit (self , key , function , args = (), kwargs = None , executor = None ):
2693
- """Safely run function in thread pool executor
2694
-
2695
- We've run into issues running concurrent.future futures within
2696
- tornado. Apparently it's advantageous to use timeouts and periodic
2697
- callbacks to ensure things run smoothly. This can get tricky, so we
2698
- pull it off into an separate method.
2699
- """
2700
- executor = executor or self .executors ["default" ]
2701
- job_counter [0 ] += 1
2702
- # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
2703
- kwargs = kwargs or {}
2704
- future = executor .submit (function , * args , ** kwargs )
2705
- pc = PeriodicCallback (
2706
- lambda : logger .debug ("future state: %s - %s" , key , future ._state ), 1000
2707
- )
2708
- ts = self .tasks .get (key )
2709
- if ts is not None :
2710
- ts .start_time = time ()
2711
- pc .start ()
2712
- try :
2713
- yield future
2714
- finally :
2715
- pc .stop ()
2716
- if ts is not None :
2717
- ts .stop_time = time ()
2718
-
2719
- result = future .result ()
2720
-
2721
- # logger.info("Finish job %d, %s", i, key)
2722
- raise gen .Return (result )
2723
-
2724
2687
def run (self , comm , function , args = (), wait = True , kwargs = None ):
2725
2688
return run (self , comm , function = function , args = args , kwargs = kwargs , wait = wait )
2726
2689
@@ -2782,19 +2745,16 @@ async def actor_execute(
2782
2745
if iscoroutinefunction (func ):
2783
2746
result = await func (* args , ** kwargs )
2784
2747
elif separate_thread :
2785
- result = await self .executor_submit (
2786
- name ,
2748
+ result = await self .loop . run_in_executor (
2749
+ self . executors [ "actor" ] ,
2787
2750
apply_function_actor ,
2788
- args = (
2789
- func ,
2790
- args ,
2791
- kwargs ,
2792
- self .execution_state ,
2793
- name ,
2794
- self .active_threads ,
2795
- self .active_threads_lock ,
2796
- ),
2797
- executor = self .executors ["actor" ],
2751
+ func ,
2752
+ args ,
2753
+ kwargs ,
2754
+ self .execution_state ,
2755
+ name ,
2756
+ self .active_threads ,
2757
+ self .active_threads_lock ,
2798
2758
)
2799
2759
else :
2800
2760
result = func (* args , ** kwargs )
@@ -2946,11 +2906,14 @@ async def execute(self, key, report=False):
2946
2906
executor ,
2947
2907
) # TODO: comment out?
2948
2908
assert key == ts .key
2909
+ self .active_keys .add (ts .key )
2949
2910
try :
2950
- result = await self .executor_submit (
2951
- ts .key ,
2952
- apply_function ,
2953
- args = (
2911
+ e = self .executors [executor ]
2912
+ ts .start_time = time ()
2913
+ if "ThreadPoolExecutor" in str (type (e )):
2914
+ result = await self .loop .run_in_executor (
2915
+ e ,
2916
+ apply_function ,
2954
2917
function ,
2955
2918
args2 ,
2956
2919
kwargs2 ,
@@ -2959,12 +2922,32 @@ async def execute(self, key, report=False):
2959
2922
self .active_threads ,
2960
2923
self .active_threads_lock ,
2961
2924
self .scheduler_delay ,
2962
- ),
2963
- executor = self .executors [executor ],
2964
- )
2925
+ )
2926
+ else :
2927
+ try :
2928
+ start = time () + self .scheduler_delay
2929
+ result = await self .loop .run_in_executor (
2930
+ e ,
2931
+ apply_function_simple ,
2932
+ function ,
2933
+ args2 ,
2934
+ kwargs2 ,
2935
+ self .scheduler_delay ,
2936
+ )
2937
+ except BaseException as e :
2938
+ msg = error_message (e )
2939
+ msg ["op" ] = "task-erred"
2940
+ msg ["actual-exception" ] = e
2941
+ msg ["start" ] = start
2942
+ msg ["stop" ] = time () + self .scheduler_delay
2943
+ msg ["thread" ] = None
2944
+ result = msg
2945
+
2965
2946
except RuntimeError as e :
2966
2947
executor_error = e
2967
2948
raise
2949
+ finally :
2950
+ self .active_keys .discard (ts .key )
2968
2951
2969
2952
# We'll need to check again for the task state since it may have
2970
2953
# changed since the execution was kicked off. In particular, it may
@@ -3854,6 +3837,27 @@ def apply_function(
3854
3837
thread_state .start_time = time ()
3855
3838
thread_state .execution_state = execution_state
3856
3839
thread_state .key = key
3840
+
3841
+ msg = apply_function_simple (function , args , kwargs , time_delay )
3842
+
3843
+ with active_threads_lock :
3844
+ del active_threads [ident ]
3845
+ return msg
3846
+
3847
+
3848
+ def apply_function_simple (
3849
+ function ,
3850
+ args ,
3851
+ kwargs ,
3852
+ time_delay ,
3853
+ ):
3854
+ """Run a function, collect information
3855
+
3856
+ Returns
3857
+ -------
3858
+ msg: dictionary with status, result/error, timings, etc..
3859
+ """
3860
+ ident = threading .get_ident ()
3857
3861
start = time ()
3858
3862
try :
3859
3863
result = function (* args , ** kwargs )
@@ -3874,8 +3878,6 @@ def apply_function(
3874
3878
msg ["start" ] = start + time_delay
3875
3879
msg ["stop" ] = end + time_delay
3876
3880
msg ["thread" ] = ident
3877
- with active_threads_lock :
3878
- del active_threads [ident ]
3879
3881
return msg
3880
3882
3881
3883
@@ -4020,9 +4022,8 @@ async def run(server, comm, function, args=(), kwargs=None, is_coro=None, wait=T
4020
4022
pass
4021
4023
else :
4022
4024
4023
- @gen .coroutine
4024
- def gpu_metric (worker ):
4025
- result = yield offload (nvml .real_time )
4025
+ async def gpu_metric (worker ):
4026
+ result = await offload (nvml .real_time )
4026
4027
return result
4027
4028
4028
4029
DEFAULT_METRICS ["gpu" ] = gpu_metric
0 commit comments