diff --git a/deck_chores/config.py b/deck_chores/config.py index 90a1010..74878d5 100644 --- a/deck_chores/config.py +++ b/deck_chores/config.py @@ -69,6 +69,9 @@ def generate_config() -> None: ) cfg.debug = trueish(getenv('DEBUG', 'no')) cfg.default_max = int(getenv('DEFAULT_MAX', 1)) + cfg.exit_on_unexpected_container_states = trueish( + getenv('EXIT_ON_UNEXPECTED_CONTAINER_STATES', 'no') + ) cfg.job_executor_pool_size = int(getenv('JOB_POOL_SIZE', 10)) cfg.job_name_regex = getenv("JOB_NAME_REGEX", "[a-z0-9-]+") cfg.label_ns = getenv('LABEL_NAMESPACE', 'deck-chores') + '.' diff --git a/deck_chores/jobs.py b/deck_chores/jobs.py index e310fb7..3714ae1 100644 --- a/deck_chores/jobs.py +++ b/deck_chores/jobs.py @@ -1,5 +1,8 @@ import logging +import os from collections.abc import Iterator, Mapping +from signal import SIGUSR2 + from apscheduler import events from apscheduler.executors.pool import ThreadPoolExecutor @@ -83,21 +86,27 @@ def on_missed(event: events.JobExecutionEvent) -> None: #### +def handle_exec_error(message: str): + log.error(message) + if cfg.exit_on_unexpected_container_states: + os.kill(os.getpid(), SIGUSR2) + + def exec_job(**definition) -> tuple[int, bytes]: job_id = definition['job_id'] container_id = definition['container_id'] log.info(f"{container_name(container_id)}: Executing '{definition['job_name']}'.") - # some sanity checks, to be removed eventually + # some sanity checks assert scheduler.get_job(job_id) is not None if cfg.client.containers.list(filters={'id': container_id, 'status': 'paused'}): - raise AssertionError('Container is paused.') + handle_exec_error('Container is paused.') if not cfg.client.containers.list( filters={'id': container_id, 'status': 'running'} ): assert scheduler.get_job(job_id) is None - raise AssertionError('Container is not running.') + handle_exec_error('Container is not running.') # end of sanity checks return cfg.client.containers.get(container_id).exec_run( diff --git a/deck_chores/main.py b/deck_chores/main.py index 1bd6e67..bc243a2 100644 --- a/deck_chores/main.py +++ b/deck_chores/main.py @@ -3,7 +3,7 @@ import os import sys from datetime import datetime, timedelta, timezone -from signal import signal, SIGINT, SIGTERM, SIGUSR1 +from signal import signal, SIGINT, SIGTERM, SIGUSR1, SIGUSR2 from typing import Optional from apscheduler.schedulers import SchedulerNotRunningError @@ -62,9 +62,15 @@ def sigusr1_handler(signum, frame): log.info(job.kwargs) +def sigusr2_handler(signum, frame): + log.info("SIGUSR2 received, exiting with error.") + raise SystemExit(1) + + signal(SIGINT, sigint_handler) signal(SIGTERM, sigterm_handler) signal(SIGUSR1, sigusr1_handler) +signal(SIGUSR2, sigusr2_handler) #### diff --git a/docs/usage.rst b/docs/usage.rst index 4a59bc7..a5607bb 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -374,6 +374,14 @@ deck-chore's behaviour is defined by these environment variables: The URL of the Docker daemon to connect to. +.. envvar:: EXIT_ON_UNEXPECTED_CONTAINER_STATES + + default: ``no`` + + Exit *deck-chores* when it finds itself confused with unexpected states of containers. + If you expect a restart, configure your process manager like *systemd* or the *Docker daemon* + accordingly. + .. envvar:: STDERR_LEVEL default: ``NOTSET`` diff --git a/tests/test_config.py b/tests/test_config.py index 0226303..b720d46 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -32,6 +32,7 @@ def every_file_exists(*args, **kwargs): 'debug': False, 'default_max': 1, 'default_flags': ('image', 'service'), + 'exit_on_unexpected_container_states': False, 'job_executor_pool_size': 10, 'job_name_regex': '[a-z0-9-]+', 'label_ns': 'deck-chores.',