1
1
import asyncio
2
+ import queue
2
3
import threading
3
4
from contextlib import contextmanager
4
5
from datetime import timedelta
@@ -36,8 +37,13 @@ def cancel(self) -> None:
36
37
37
38
class _TimeoutManager :
38
39
"""
39
- This class manages timeouts for futures. It uses a background thread with an
40
- event loop to schedule the timeouts.
40
+ This class manages timeouts for code blocks, futures and CUDA events. It
41
+ uses a background thread with an event loop to schedule the timeouts and
42
+ call the callback function when the timeout is reached.
43
+
44
+ Generally there is a single instance of this class that is used for all
45
+ timeouts. The callbacks should not block otherwise other timeouts may not
46
+ be processed.
41
47
"""
42
48
43
49
def __init__ (self ) -> None :
@@ -46,6 +52,10 @@ def __init__(self) -> None:
46
52
self ._event_loop_thread : Optional [threading .Thread ] = None
47
53
self ._next_timer_id = 0
48
54
55
+ # This queue is used to delete events on the main thread as cudaEventDestroy
56
+ # can block if the CUDA queue is full.
57
+ self ._del_queue : queue .SimpleQueue [object ] = queue .SimpleQueue ()
58
+
49
59
def _maybe_start_event_loop (self ) -> asyncio .AbstractEventLoop :
50
60
"""
51
61
Start the event loop if it has not already been started.
@@ -82,6 +92,8 @@ def register(self, fut: Future[T], timeout: timedelta) -> Future[T]:
82
92
if isinstance (fut , Mock ):
83
93
return fut
84
94
95
+ self ._clear_del_queue ()
96
+
85
97
loop = self ._maybe_start_event_loop ()
86
98
87
99
# pyre-fixme[29]: Future is not a function
@@ -114,6 +126,8 @@ def callback(fut: Future[T]) -> None:
114
126
return timed_fut
115
127
116
128
def stream_timeout (self , callback : Callable [[], None ], timeout : timedelta ) -> None :
129
+ self ._clear_del_queue ()
130
+
117
131
loop = self ._maybe_start_event_loop ()
118
132
119
133
event : torch .cuda .Event = torch .cuda .Event ()
@@ -123,6 +137,11 @@ def handler() -> None:
123
137
if not event .query ():
124
138
callback ()
125
139
140
+ # cudaEventDestroy can block so we never want to delete in the event
141
+ # loop. Put it on the del queue so we can delete it in the main
142
+ # thread.
143
+ self ._del_queue .put (event )
144
+
126
145
loop .call_soon_threadsafe (
127
146
self ._register_callback , loop , handler , timeout , _TimerHandle ()
128
147
)
@@ -145,6 +164,8 @@ def _register_callback(
145
164
def context_timeout (
146
165
self , callback : Callable [[], None ], timeout : timedelta
147
166
) -> Generator [None , None , None ]:
167
+ self ._clear_del_queue ()
168
+
148
169
loop = self ._maybe_start_event_loop ()
149
170
handle = _TimerHandle ()
150
171
@@ -156,6 +177,17 @@ def context_timeout(
156
177
157
178
handle .cancel ()
158
179
180
+ def _clear_del_queue (self ) -> None :
181
+ """
182
+ Clear the queue of futures to be deleted.
183
+ """
184
+ while True :
185
+ try :
186
+ # get and immediately discard item
187
+ self ._del_queue .get_nowait ()
188
+ except queue .Empty :
189
+ break
190
+
159
191
160
192
_TIMEOUT_MANAGER = _TimeoutManager ()
161
193
0 commit comments