Skip to content

Commit b76dc76

Browse files
committed
Suspend execution feature
This commit introduces the suspend execution feature to the nrunner. The suspend execution was available on the legacy runner, but we didn't move it to the nrunner. With this feature, it is possible to pause execution of python based task on process spawner by sending SIGTSTP signal (ctrl+z). It is helpful for debugging test execution. Reference: #6059 Signed-off-by: Jan Richter <[email protected]>
1 parent 92890bd commit b76dc76

File tree

9 files changed

+183
-9
lines changed

9 files changed

+183
-9
lines changed

avocado/core/nrunner/runner.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import abc
22
import multiprocessing
3+
import os
34
import signal
45
import time
56
import traceback
@@ -8,6 +9,7 @@
89
from avocado.core.nrunner.runnable import RUNNERS_REGISTRY_STANDALONE_EXECUTABLE
910
from avocado.core.plugin_interfaces import RunnableRunner
1011
from avocado.core.utils import messages
12+
from avocado.utils import process
1113

1214
#: The amount of time (in seconds) between each internal status check
1315
RUNNER_RUN_CHECK_INTERVAL = 0.01
@@ -99,16 +101,37 @@ class PythonBaseRunner(BaseRunner, abc.ABC):
99101
Base class for Python runners
100102
"""
101103

102-
@staticmethod
103-
def signal_handler(signum, frame): # pylint: disable=W0613
104+
def __init__(self):
105+
super().__init__()
106+
self.proc = None
107+
self.process_stopped = False
108+
self.stop_signal = False
109+
110+
def signal_handler(self, signum, frame): # pylint: disable=W0613
104111
if signum == signal.SIGTERM.value:
105112
raise TestInterrupt("Test interrupted: Timeout reached")
113+
elif signum == signal.SIGTSTP.value:
114+
self.stop_signal = True
106115

107-
@staticmethod
108-
def _monitor(queue):
116+
def pause_process(self):
117+
if self.process_stopped:
118+
self.process_stopped = False
119+
sign = signal.SIGCONT
120+
else:
121+
self.process_stopped = True
122+
sign = signal.SIGSTOP
123+
processes = process.get_children_pids(self.proc.pid, recursive=True)
124+
processes.append(self.proc.pid)
125+
for pid in processes:
126+
os.kill(pid, sign)
127+
128+
def _monitor(self, queue):
109129
most_recent_status_time = None
110130
while True:
111131
time.sleep(RUNNER_RUN_CHECK_INTERVAL)
132+
if self.stop_signal:
133+
self.stop_signal = False
134+
self.pause_process()
112135
if queue.empty():
113136
now = time.monotonic()
114137
if (
@@ -126,23 +149,26 @@ def _monitor(queue):
126149
break
127150

128151
def run(self, runnable):
129-
# pylint: disable=W0201
152+
if hasattr(signal, "SIGTSTP"):
153+
signal.signal(signal.SIGTSTP, signal.SIG_IGN)
154+
signal.signal(signal.SIGTSTP, self.signal_handler)
130155
signal.signal(signal.SIGTERM, self.signal_handler)
156+
# pylint: disable=W0201
131157
self.runnable = runnable
132158
yield messages.StartedMessage.get()
133159
try:
134160
queue = multiprocessing.SimpleQueue()
135-
process = multiprocessing.Process(
161+
self.proc = multiprocessing.Process(
136162
target=self._run, args=(self.runnable, queue)
137163
)
138164

139-
process.start()
165+
self.proc.start()
140166

141167
for message in self._monitor(queue):
142168
yield message
143169

144170
except TestInterrupt:
145-
process.terminate()
171+
self.proc.terminate()
146172
for message in self._monitor(queue):
147173
yield message
148174
except Exception as e:

avocado/core/plugin_interfaces.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,28 @@ async def terminate_task(self, runtime_task):
376376
:rtype: bool
377377
"""
378378

379+
async def stop_task(self, runtime_task):
380+
"""Stop already spawned task.
381+
382+
:param runtime_task: wrapper for a Task with additional runtime
383+
information.
384+
:type runtime_task: :class:`avocado.core.task.runtime.RuntimeTask`
385+
:returns: whether the task has been stopped or not.
386+
:rtype: bool
387+
"""
388+
raise NotImplementedError()
389+
390+
async def resume_task(self, runtime_task):
391+
"""Resume already stopped task.
392+
393+
:param runtime_task: wrapper for a Task with additional runtime
394+
information.
395+
:type runtime_task: :class:`avocado.core.task.runtime.RuntimeTask`
396+
:returns: whether the task has been resumed or not.
397+
:rtype: bool
398+
"""
399+
raise NotImplementedError()
400+
379401
@staticmethod
380402
@abc.abstractmethod
381403
async def check_task_requirements(runtime_task):

avocado/core/task/runtime.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class RuntimeTaskStatus(Enum):
1818
FAIL_TRIAGE = "FINISHED WITH FAILURE ON TRIAGE"
1919
FAIL_START = "FINISHED FAILING TO START"
2020
STARTED = "STARTED"
21+
PAUSED = "PAUSED"
2122

2223
@staticmethod
2324
def finished_statuses():

avocado/core/task/statemachine.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import time
66

77
from avocado.core.exceptions import JobFailFast
8+
from avocado.core.output import LOG_UI
89
from avocado.core.task.runtime import RuntimeTaskStatus
910
from avocado.core.teststatus import STATUSES_NOT_OK
1011
from avocado.core.utils import messages
@@ -493,6 +494,31 @@ async def terminate_tasks_interrupted(self):
493494
terminated = await self._terminate_tasks(task_status)
494495
await self._send_finished_tasks_message(terminated, "Interrupted by user")
495496

497+
@staticmethod
498+
async def stop_resume_tasks(state_machine, spawner):
499+
async with state_machine.lock:
500+
try:
501+
for runtime_task in state_machine.monitored:
502+
if runtime_task.status == RuntimeTaskStatus.STARTED:
503+
await spawner.stop_task(runtime_task)
504+
runtime_task.status = RuntimeTaskStatus.PAUSED
505+
LOG_UI.warning(
506+
f"{runtime_task.task.identifier}: {runtime_task.status.value}"
507+
)
508+
elif runtime_task.status == RuntimeTaskStatus.PAUSED:
509+
await spawner.resume_task(runtime_task)
510+
runtime_task.status = RuntimeTaskStatus.STARTED
511+
LOG_UI.warning(
512+
f"{runtime_task.task.identifier}: {runtime_task.status.value}"
513+
)
514+
except NotImplementedError:
515+
LOG.warning(
516+
f"Sending signals to tasks is not implemented for spawner: {spawner}"
517+
)
518+
LOG_UI.warning(
519+
f"Sending signals to tasks is not implemented for spawner: {spawner}"
520+
)
521+
496522
async def run(self):
497523
"""Pushes Tasks forward and makes them do something with their lives."""
498524
while True:

avocado/plugins/runner_nrunner.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
import os
2222
import platform
2323
import random
24+
import signal
2425
import tempfile
26+
import threading
2527

2628
from avocado.core.dispatcher import SpawnerDispatcher
2729
from avocado.core.exceptions import JobError, JobFailFast
@@ -269,6 +271,10 @@ def _abort_if_missing_runners(runnables):
269271
)
270272
raise JobError(msg)
271273

274+
@staticmethod
275+
def signal_handler(spawner, state_machine):
276+
asyncio.create_task(Worker.stop_resume_tasks(state_machine, spawner))
277+
272278
def run_suite(self, job, test_suite):
273279
summary = set()
274280

@@ -335,6 +341,14 @@ def run_suite(self, job, test_suite):
335341
]
336342
asyncio.ensure_future(self._update_status(job))
337343
loop = asyncio.get_event_loop()
344+
if (
345+
hasattr(signal, "SIGTSTP")
346+
and threading.current_thread() is threading.main_thread()
347+
):
348+
loop.add_signal_handler(
349+
signal.SIGTSTP,
350+
lambda: self.signal_handler(spawner, self.tsm),
351+
)
338352
try:
339353
try:
340354
loop.run_until_complete(

avocado/plugins/spawners/process.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import os
3+
import signal
34
import socket
45

56
from avocado.core.dependencies.requirements import cache
@@ -109,6 +110,16 @@ async def terminate_task(self, runtime_task):
109110
pass
110111
return returncode is not None
111112

113+
async def stop_task(self, runtime_task):
114+
try:
115+
runtime_task.spawner_handle.process.send_signal(signal.SIGTSTP)
116+
except ProcessLookupError:
117+
return False
118+
return
119+
120+
async def resume_task(self, runtime_task):
121+
await self.stop_task(runtime_task)
122+
112123
@staticmethod
113124
async def check_task_requirements(runtime_task):
114125
"""Check the runtime task requirements needed to be able to run"""

docs/source/guides/contributor/chapters/tips.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,27 @@ During the execution look for::
3333

3434
avocado --show avocado.utils.debug run examples/tests/assets.py
3535

36+
Interrupting test
37+
-----------------
38+
39+
In case you want to "pause" the running test, you can use SIGTSTP (ctrl+z)
40+
signal sent to the main avocado process. This signal is forwarded to test
41+
and it's children processes. To resume testing you repeat the same signal.
42+
43+
.. note::
44+
The job and test timeouts are still enabled on stopped processes. This
45+
means that after you restart the test can be killed by the timeout if
46+
the timeout was reached. You can use run `-p timeout_factor=$int$` to
47+
increase the timeouts for your debugging.
48+
49+
.. note::
50+
It is supported on on process spawner only.
51+
52+
.. warning::
53+
This feature is meant only for debugging purposes and it can
54+
cause unreliable behavior especially if the signal is sent during the
55+
test initialization. Therefore use it with caution.
56+
3657
Line-profiler
3758
-------------
3859

selftests/check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
"unit": 661,
3131
"jobs": 11,
3232
"functional-parallel": 314,
33-
"functional-serial": 7,
33+
"functional-serial": 8,
3434
"optional-plugins": 0,
3535
"optional-plugins-golang": 2,
3636
"optional-plugins-html": 3,
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import os
2+
import re
3+
import signal
4+
import time
5+
6+
from avocado.utils import process, script
7+
from selftests.utils import AVOCADO, TestCaseTmpDir
8+
9+
SLEEP_TEST = """import time
10+
11+
from avocado import Test
12+
13+
14+
class SleepTest(Test):
15+
16+
def test(self):
17+
self.log.debug("Sleeping starts: %s", time.time())
18+
time.sleep(5)
19+
self.log.debug("Sleeping ends: %s", time.time())
20+
"""
21+
22+
23+
class RunnerOperationTest(TestCaseTmpDir):
24+
def test_pause(self):
25+
with script.TemporaryScript(
26+
"sleep.py",
27+
SLEEP_TEST,
28+
) as tst:
29+
cmd_line = f"{AVOCADO} run --disable-sysinfo --job-results-dir {self.tmpdir.name} -- {tst}"
30+
proc = process.SubProcess(cmd_line)
31+
proc.start()
32+
init = True
33+
while init:
34+
output = proc.get_stdout()
35+
if b"STARTED" in output:
36+
init = False
37+
time.sleep(1)
38+
proc.send_signal(signal.SIGTSTP)
39+
time.sleep(10)
40+
proc.send_signal(signal.SIGTSTP)
41+
proc.wait()
42+
full_log_path = os.path.join(self.tmpdir.name, "latest", "full.log")
43+
with open(full_log_path, encoding="utf-8") as full_log_file:
44+
full_log = full_log_file.read()
45+
self.assertIn("SleepTest.test: PAUSED", full_log)
46+
self.assertIn("SleepTest.test: STARTED", full_log)
47+
self.assertIn("Sleeping starts:", full_log)
48+
self.assertIn("Sleeping ends:", full_log)
49+
regex_start = re.search("Sleeping starts: ([0-9]*)", full_log)
50+
regex_end = re.search("Sleeping ends: ([0-9]*)", full_log)
51+
start_time = int(regex_start.group(1))
52+
end_time = int(regex_end.group(1))
53+
self.assertGreaterEqual(end_time - start_time, 10)

0 commit comments

Comments
 (0)