2121"""
2222
2323import asyncio
24+ from contextlib import suppress
2425import json
2526import os
2627import re
27- import sys
2828import signal
2929import psutil
30- import functools
3130
3231from pathlib import Path
3332from dataclasses import dataclass
@@ -53,20 +52,25 @@ class Process:
5352 cgroup_version : int
5453
5554
56- def stop_profiler (process , comms_timeout , * _args ):
55+ async def stop_profiler (process , comms_timeout , tasks , * _args ):
5756 """Stop the profiler and return its data to the scheduler.
5857
5958 This function will be executed when the profiler receives a stop signal.
6059 """
60+ # stop the profiler
61+ for task in tasks :
62+ task .cancel ()
63+
64+ # extract the stats
6165 profiler_data = get_profiler_data (process )
6266
63- record_messages (
67+ # send a task message to the scheduler / write message to job.status file
68+ await record_messages (
6469 os .environ ['CYLC_WORKFLOW_ID' ],
6570 os .environ ['CYLC_TASK_JOB' ],
6671 [['DEBUG' , f'_cylc_profiler: { json .dumps (profiler_data )} ' ]],
6772 comms_timeout = comms_timeout ,
6873 )
69- sys .exit (0 )
7074
7175
7276def get_profiler_data (process ):
@@ -219,7 +223,7 @@ async def profile(_process: Process, delay, keep_looping=lambda: True):
219223 while keep_looping ():
220224 # Polling the cgroup for memory and keeping track of the max rss value
221225 max_rss = parse_memory_file (_process )
222- if max_rss > _process .max_rss :
226+ if max_rss is not None and max_rss > _process .max_rss :
223227 _process .max_rss = max_rss
224228 await asyncio .sleep (delay )
225229
@@ -244,29 +248,36 @@ def get_option_parser() -> COP:
244248@cli_function (get_option_parser )
245249def main (_parser : COP , options ) -> None :
246250 """CLI main."""
247- asyncio .run (_main (options ))
251+ with suppress (SystemExit , asyncio .exceptions .CancelledError , Exception ):
252+ asyncio .run (_main (options ))
248253
249254
250255async def _main (options ) -> None :
251256 # get cgroup information
252257 process = get_cgroup_paths (options .cgroup_location )
253258
259+ # list of asyncio tasks
260+ tasks = []
261+
254262 # Register the stop_profiler function with the signal library
255263 # The signal library doesn't work with asyncio, so we have to use the
256264 # loop's add_signal_handler function instead
257265 loop = asyncio .get_running_loop ()
258266 for sig in (signal .SIGINT , signal .SIGHUP , signal .SIGTERM ):
259267 loop .add_signal_handler (
260268 sig ,
261- functools .partial (stop_profiler , process , options .comms_timeout )
269+ lambda : asyncio .create_task (
270+ stop_profiler (process , options .comms_timeout , tasks )
271+ ),
262272 )
263273
264274 # the profiler will run until one of these coroutines calls `sys.exit`:
265- await asyncio . gather (
275+ tasks . extend ([
266276 # run the profiler itself
267- profile (process , options .delay ),
277+ asyncio . create_task ( profile (process , options .delay ) ),
268278
269279 # kill the profiler if its PPID changes
270280 # (i.e, if the job exits before the profiler does)
271- watch_and_kill (psutil .Process (os .getpid ())),
272- )
281+ asyncio .create_task (watch_and_kill (psutil .Process (os .getpid ()))),
282+ ])
283+ await asyncio .gather (* tasks )
0 commit comments