1+ from __future__ import annotations # allow newer type syntax until 3.10 is our minimum
2+
13import codecs
24import json
35import os
68import tempfile
79import uuid
810import traceback
9- try :
10- from collections .abc import Mapping
11- except ImportError :
12- from collections import Mapping
1311
1412import ansible_runner
1513from ansible_runner .exceptions import ConfigurationError
1614from ansible_runner .loader import ArtifactLoader
1715import ansible_runner .plugins
1816from ansible_runner .utils import register_for_cleanup
1917from ansible_runner .utils .streaming import stream_dir , unstream_dir
18+ from collections .abc import Mapping
19+ from functools import wraps
20+ from threading import Event , RLock , Thread
2021
2122
2223class UUIDEncoder (json .JSONEncoder ):
@@ -38,6 +39,9 @@ def __init__(self, _output=None, **kwargs):
3839 self ._output = _output
3940 self .private_data_dir = os .path .abspath (kwargs .pop ('private_data_dir' ))
4041 self .only_transmit_kwargs = kwargs .pop ('only_transmit_kwargs' , False )
42+ if 'keepalive_seconds' in kwargs :
43+ kwargs .pop ('keepalive_seconds' ) # don't confuse older runners with this Worker-only arg
44+
4145 self .kwargs = kwargs
4246
4347 self .status = "unstarted"
@@ -60,12 +64,22 @@ def run(self):
6064 return self .status , self .rc
6165
6266
63- class Worker ( object ) :
64- def __init__ (self , _input = None , _output = None , ** kwargs ):
67+ class Worker :
68+ def __init__ (self , _input = None , _output = None , keepalive_seconds : float | None = None , ** kwargs ):
6569 if _input is None :
6670 _input = sys .stdin .buffer
6771 if _output is None :
6872 _output = sys .stdout .buffer
73+
74+ if keepalive_seconds is None : # if we didn't get an explicit int value, fall back to envvar
75+ # FIXME: emit/log a warning and silently continue if this value won't parse
76+ keepalive_seconds = float (os .environ .get ('ANSIBLE_RUNNER_KEEPALIVE_SECONDS' , 0 ))
77+
78+ self ._keepalive_interval_sec = keepalive_seconds
79+ self ._keepalive_thread : Thread | None = None
80+ self ._output_event = Event ()
81+ self ._output_lock = RLock ()
82+
6983 self ._input = _input
7084 self ._output = _output
7185
@@ -81,6 +95,64 @@ def __init__(self, _input=None, _output=None, **kwargs):
8195 self .status = "unstarted"
8296 self .rc = None
8397
98+ def _begin_keepalive (self ):
99+ """Starts a keepalive thread at most once"""
100+ if not self ._keepalive_thread :
101+ self ._keepalive_thread = Thread (target = self ._keepalive_loop , daemon = True )
102+ self ._keepalive_thread .start ()
103+
104+ def _end_keepalive (self ):
105+ """Disable the keepalive interval and notify the keepalive thread to shut down"""
106+ self ._keepalive_interval_sec = 0
107+ self ._output_event .set ()
108+
109+ def _keepalive_loop (self ):
110+ """Main loop for keepalive injection thread; exits when keepalive interval is <= 0"""
111+ while self ._keepalive_interval_sec > 0 :
112+ # block until output has occurred or keepalive interval elapses
113+ if self ._output_event .wait (timeout = self ._keepalive_interval_sec ):
114+ # output was sent before keepalive timeout; reset the event and start waiting again
115+ self ._output_event .clear ()
116+ continue
117+
118+ # keepalive interval elapsed; try to send a keepalive...
119+ # pre-acquire the output lock without blocking
120+ if not self ._output_lock .acquire (blocking = False ):
121+ # something else has the lock; output is imminent, so just skip this keepalive
122+ # NB: a long-running operation under an event handler that's holding this lock but not actually moving
123+ # output could theoretically block keepalives long enough to cause problems, but it's probably not
124+ # worth the added locking hassle to be pedantic about it
125+ continue
126+
127+ try :
128+ # were keepalives recently disabled?
129+ if self ._keepalive_interval_sec <= 0 :
130+ # we're probably shutting down; don't risk corrupting output by writing now, just bail out
131+ return
132+ # output a keepalive event
133+ # FIXME: this could be a lot smaller (even just `{}`) if a short-circuit discard was guaranteed in
134+ # Processor or if other layers were more defensive about missing event keys and/or unknown dictionary
135+ # values...
136+ self .event_handler (dict (event = 'keepalive' , counter = 0 , uuid = 0 ))
137+ finally :
138+ # always release the output lock (
139+ self ._output_lock .release ()
140+
141+ def _synchronize_output_reset_keepalive (wrapped_method ):
142+ """
143+ Utility decorator to synchronize event writes and flushes to avoid keepalives splatting in the middle of
144+ mid-write events, and reset keepalive interval on write completion.
145+ """
146+ @wraps (wrapped_method )
147+ def wrapper (self , * args , ** kwargs ):
148+ with self ._output_lock :
149+ ret = wrapped_method (self , * args , ** kwargs )
150+ # signal the keepalive thread last, so the timeout restarts after the last write, not before the first
151+ self ._output_event .set ()
152+ return ret
153+
154+ return wrapper
155+
84156 def update_paths (self , kwargs ):
85157 if kwargs .get ('envvars' ):
86158 if 'ANSIBLE_ROLES_PATH' in kwargs ['envvars' ]:
@@ -93,63 +165,72 @@ def update_paths(self, kwargs):
93165 return kwargs
94166
95167 def run (self ):
96- while True :
97- try :
98- line = self ._input .readline ()
99- data = json .loads (line )
100- except (json .decoder .JSONDecodeError , IOError ):
101- self .status_handler ({'status' : 'error' , 'job_explanation' : 'Failed to JSON parse a line from transmit stream.' }, None )
102- self .finished_callback (None ) # send eof line
103- return self .status , self .rc
104-
105- if 'kwargs' in data :
106- self .job_kwargs = self .update_paths (data ['kwargs' ])
107- elif 'zipfile' in data :
168+ self ._begin_keepalive ()
169+ try :
170+ while True :
108171 try :
109- unstream_dir (self ._input , data ['zipfile' ], self .private_data_dir )
110- except Exception :
111- self .status_handler ({
112- 'status' : 'error' ,
113- 'job_explanation' : 'Failed to extract private data directory on worker.' ,
114- 'result_traceback' : traceback .format_exc ()
115- }, None )
172+ line = self ._input .readline ()
173+ data = json .loads (line )
174+ except (json .decoder .JSONDecodeError , IOError ):
175+ self .status_handler ({'status' : 'error' , 'job_explanation' : 'Failed to JSON parse a line from transmit stream.' }, None )
116176 self .finished_callback (None ) # send eof line
117177 return self .status , self .rc
118- elif 'eof' in data :
119- break
120178
121- self .kwargs .update (self .job_kwargs )
122- self .kwargs ['quiet' ] = True
123- self .kwargs ['suppress_ansible_output' ] = True
124- self .kwargs ['private_data_dir' ] = self .private_data_dir
125- self .kwargs ['status_handler' ] = self .status_handler
126- self .kwargs ['event_handler' ] = self .event_handler
127- self .kwargs ['artifacts_handler' ] = self .artifacts_handler
128- self .kwargs ['finished_callback' ] = self .finished_callback
129-
130- r = ansible_runner .interface .run (** self .kwargs )
131- self .status , self .rc = r .status , r .rc
132-
133- # FIXME: do cleanup on the tempdir
179+ if 'kwargs' in data :
180+ self .job_kwargs = self .update_paths (data ['kwargs' ])
181+ elif 'zipfile' in data :
182+ try :
183+ unstream_dir (self ._input , data ['zipfile' ], self .private_data_dir )
184+ except Exception :
185+ self .status_handler ({
186+ 'status' : 'error' ,
187+ 'job_explanation' : 'Failed to extract private data directory on worker.' ,
188+ 'result_traceback' : traceback .format_exc ()
189+ }, None )
190+ self .finished_callback (None ) # send eof line
191+ return self .status , self .rc
192+ elif 'eof' in data :
193+ break
194+
195+ self .kwargs .update (self .job_kwargs )
196+ self .kwargs ['quiet' ] = True
197+ self .kwargs ['suppress_ansible_output' ] = True
198+ self .kwargs ['private_data_dir' ] = self .private_data_dir
199+ self .kwargs ['status_handler' ] = self .status_handler
200+ self .kwargs ['event_handler' ] = self .event_handler
201+ self .kwargs ['artifacts_handler' ] = self .artifacts_handler
202+ self .kwargs ['finished_callback' ] = self .finished_callback
203+
204+ r = ansible_runner .interface .run (** self .kwargs )
205+ self .status , self .rc = r .status , r .rc
206+
207+ # FIXME: do cleanup on the tempdir
208+ finally :
209+ self ._end_keepalive ()
134210
135211 return self .status , self .rc
136212
213+ @_synchronize_output_reset_keepalive
137214 def status_handler (self , status_data , runner_config ):
138215 self .status = status_data ['status' ]
139216 self ._output .write (json .dumps (status_data ).encode ('utf-8' ))
140217 self ._output .write (b'\n ' )
141218 self ._output .flush ()
142219
220+ @_synchronize_output_reset_keepalive
143221 def event_handler (self , event_data ):
144222 self ._output .write (json .dumps (event_data ).encode ('utf-8' ))
145223 self ._output .write (b'\n ' )
146224 self ._output .flush ()
147225
226+ @_synchronize_output_reset_keepalive
148227 def artifacts_handler (self , artifact_dir ):
149228 stream_dir (artifact_dir , self ._output )
150229 self ._output .flush ()
151230
231+ @_synchronize_output_reset_keepalive
152232 def finished_callback (self , runner_obj ):
233+ self ._end_keepalive () # ensure that we can't splat a keepalive event after the eof event
153234 self ._output .write (json .dumps ({'eof' : True }).encode ('utf-8' ))
154235 self ._output .write (b'\n ' )
155236 self ._output .flush ()
@@ -210,10 +291,18 @@ def status_callback(self, status_data):
210291 self .status_handler (status_data , runner_config = self .config )
211292
212293 def event_callback (self , event_data ):
294+ # FIXME: this needs to be more defensive to not blow up on "malformed" events or new values it doesn't recognize
295+ counter = event_data .get ('counter' )
296+ uuid = event_data .get ('uuid' )
297+
298+ if not counter or not uuid :
299+ # FIXME: log a warning about a malformed event?
300+ return
301+
213302 full_filename = os .path .join (self .artifact_dir ,
214303 'job_events' ,
215- '{ }-{}.json'. format ( event_data [ 'counter' ],
216- event_data [ 'uuid' ]))
304+ f' { counter } -{ uuid } .json')
305+
217306 if not self .quiet and 'stdout' in event_data :
218307 print (event_data ['stdout' ])
219308
@@ -254,6 +343,9 @@ def run(self):
254343 self .artifacts_callback (data )
255344 elif 'eof' in data :
256345 break
346+ elif data .get ('event' ) == 'keepalive' :
347+ # just ignore keepalives
348+ continue
257349 else :
258350 self .event_callback (data )
259351
0 commit comments