Skip to content

Commit

Permalink
feat: support starting and stopping Pebble checks, and the checks ena…
Browse files Browse the repository at this point in the history
…bled field (canonical#1560)

Add support for:

* The `startup` field in Pebble checks.
* The `start_checks` and `stop_checks` Pebble API calls.

Harness (and Scenario, mostly via re-using the Harness implementation)
is adjusted to more closely simulate the Changes implementation of
Pebble Checks, so that the 'if the change ID is the empty string, the
check is inactive' behaviour can be simulated.

A subtle bug with notices and check-infos is also fixed: previously the
mocked Pebble would gather all notices and check-infos from all
containers in the state, instead of only those that are in the correct
container.

[Pebble PR](canonical/pebble#560)
  • Loading branch information
tonyandrewmeyer authored Feb 18, 2025
1 parent a83ffef commit d54a26a
Show file tree
Hide file tree
Showing 11 changed files with 523 additions and 8 deletions.
87 changes: 86 additions & 1 deletion ops/_private/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3175,7 +3175,42 @@ def autostart_services(self, timeout: float = 30.0, delay: float = 0.1):
if startup == pebble.ServiceStartup.ENABLED:
self._service_status[name] = pebble.ServiceStatus.ACTIVE

def _new_perform_check(self, info: pebble.CheckInfo) -> pebble.Change:
now = datetime.datetime.now()
change = pebble.Change(
pebble.ChangeID(str(uuid.uuid4())),
pebble.ChangeKind.PERFORM_CHECK.value,
summary=info.name,
status=pebble.ChangeStatus.DOING.value,
tasks=[],
ready=False,
err=None,
spawn_time=now,
ready_time=None,
)
info.change_id = change.id
info.status = pebble.CheckStatus.UP
info.failures = 0
self._changes[change.id] = change
return change

def replan_services(self, timeout: float = 30.0, delay: float = 0.1):
for name, check in self._render_checks().items():
if check.startup == pebble.CheckStartup.DISABLED:
continue
info = self._check_infos.get(name)
if info is None:
info = pebble.CheckInfo(
name=name,
level=check.level,
status=pebble.CheckStatus.UP,
failures=0,
threshold=3 if check.threshold is None else check.threshold,
startup=check.startup,
)
self._check_infos[name] = info
if not info.change_id:
self._new_perform_check(info)
return self.autostart_services(timeout, delay)

def start_services(
Expand Down Expand Up @@ -3346,12 +3381,35 @@ def add_layer(
else:
self._layers[label] = layer_obj

# Checks are started when the layer is added, not (only) on replan.
for name, check in layer_obj.checks.items():
try:
info = self._check_infos[name]
except KeyError:
status = (
pebble.CheckStatus.INACTIVE
if check.startup == pebble.CheckStartup.DISABLED
else pebble.CheckStatus.UP
)
info = pebble.CheckInfo(
name,
level=check.level,
status=status,
failures=0,
change_id=pebble.ChangeID(''),
)
self._check_infos[name] = info
info.level = check.level
info.threshold = 3 if check.threshold is None else check.threshold
info.startup = check.startup
if info.startup != pebble.CheckStartup.DISABLED and not info.change_id:
self._new_perform_check(info)

def _render_services(self) -> Dict[str, pebble.Service]:
services: Dict[str, pebble.Service] = {}
for key in sorted(self._layers.keys()):
layer = self._layers[key]
for name, service in layer.services.items():
# TODO: merge existing services https://github.com/canonical/operator/issues/1112
services[name] = service
return services

Expand Down Expand Up @@ -3743,6 +3801,33 @@ def get_checks(
if (level is None or level == info.level) and (names is None or info.name in names)
]

def start_checks(self, names: List[str]) -> List[str]:
self._check_connection()
started: List[str] = []
for name in names:
if name not in self._check_infos:
raise self._api_error(404, f'cannot find check with name "{name}"')
info = self._check_infos[name]
if not info.change_id:
self._new_perform_check(info)
started.append(name)
return started

def stop_checks(self, names: List[str]) -> List[str]:
self._check_connection()
stopped: List[str] = []
for name in names:
if name not in self._check_infos:
raise self._api_error(404, f'cannot find check with name "{name}"')
info = self._check_infos[name]
if info.change_id:
change = self._changes[info.change_id]
change.status = pebble.ChangeStatus.ABORT.value
info.status = pebble.CheckStatus.INACTIVE
info.change_id = pebble.ChangeID('')
stopped.append(name)
return stopped

def notify(
self,
type: pebble.NoticeType,
Expand Down
24 changes: 24 additions & 0 deletions ops/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2503,6 +2503,30 @@ def get_check(self, check_name: str) -> pebble.CheckInfo:
raise RuntimeError(f'expected 1 check, got {len(checks)}')
return checks[check_name]

def start_checks(self, *check_names: str) -> List[str]:
"""Start given check(s) by name.
Returns:
A list of check names that were started. Checks that were already
running will not be included.
"""
if not check_names:
raise TypeError('start-checks expected at least 1 argument, got 0')

return self._pebble.start_checks(check_names)

def stop_checks(self, *check_names: str) -> List[str]:
"""Stop given check(s) by name.
Returns:
A list of check names that were stopped. Checks that were already
inactive will not be included.
"""
if not check_names:
raise TypeError('stop-checks expected at least 1 argument, got 0')

return self._pebble.stop_checks(check_names)

@typing.overload
def pull(self, path: Union[str, PurePath], *, encoding: None) -> BinaryIO: ...

Expand Down
76 changes: 73 additions & 3 deletions ops/pebble.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
{
'override': str,
'level': Union['CheckLevel', str],
'startup': Literal['', 'enabled', 'disabled'],
'period': Optional[str],
'timeout': Optional[str],
'http': Optional[HttpDict],
Expand Down Expand Up @@ -243,6 +244,7 @@ def __enter__(self) -> typing.IO[typing.AnyStr]: ...
{
'name': str,
'level': NotRequired[str],
'startup': NotRequired[Literal['enabled', 'disabled']],
'status': str,
'failures': NotRequired[int],
'threshold': int,
Expand Down Expand Up @@ -1103,6 +1105,7 @@ def __init__(self, name: str, raw: Optional[CheckDict] = None):
except ValueError:
level = dct.get('level', '')
self.level = level
self.startup = CheckStartup(dct.get('startup', ''))
self.period: Optional[str] = dct.get('period', '')
self.timeout: Optional[str] = dct.get('timeout', '')
self.threshold: Optional[int] = dct.get('threshold')
Expand All @@ -1128,6 +1131,7 @@ def to_dict(self) -> CheckDict:
fields = [
('override', self.override),
('level', level),
('startup', self.startup.value),
('period', self.period),
('timeout', self.timeout),
('threshold', self.threshold),
Expand Down Expand Up @@ -1231,6 +1235,15 @@ class CheckStatus(enum.Enum):

UP = 'up'
DOWN = 'down'
INACTIVE = 'inactive'


class CheckStartup(enum.Enum):
"""Enum of check startup options."""

UNSET = '' # Note that this is treated as ENABLED.
ENABLED = 'enabled'
DISABLED = 'disabled'


class LogTarget:
Expand Down Expand Up @@ -1407,12 +1420,21 @@ class CheckInfo:
This can be :attr:`CheckLevel.ALIVE`, :attr:`CheckLevel.READY`, or None (level not set).
"""

startup: CheckStartup
"""Startup mode.
:attr:`CheckStartup.ENABLED` means the check will be started when added, and
in a replan. :attr:`CheckStartup.DISABLED` means the check must be manually
started.
"""

status: Union[CheckStatus, str]
"""Status of the check.
:attr:`CheckStatus.UP` means the check is healthy (the number of failures
is less than the threshold), :attr:`CheckStatus.DOWN` means the check is
unhealthy (the number of failures has reached the threshold).
unhealthy (the number of failures has reached the threshold), and
:attr:`CheckStatus.INACTIVE` means the check is not running.
"""

failures: int
Expand Down Expand Up @@ -1442,9 +1464,11 @@ def __init__(
failures: int = 0,
threshold: int = 0,
change_id: Optional[ChangeID] = None,
startup: CheckStartup = CheckStartup.ENABLED,
):
self.name = name
self.level = level
self.startup = startup
self.status = status
self.failures = failures
self.threshold = threshold
Expand All @@ -1461,20 +1485,27 @@ def from_dict(cls, d: _CheckInfoDict) -> CheckInfo:
status = CheckStatus(d['status'])
except ValueError:
status = d['status']
change_id = ChangeID(d['change-id']) if 'change-id' in d else None
if not change_id and 'startup' in d:
# This is a version of Pebble that supports stopping checks, which
# means that the check is inactive if it has no change ID.
status = CheckStatus.INACTIVE
return cls(
name=d['name'],
level=level,
startup=CheckStartup(d.get('startup', 'enabled')),
status=status,
failures=d.get('failures', 0),
threshold=d['threshold'],
change_id=ChangeID(d['change-id']) if 'change-id' in d else None,
change_id=change_id,
)

def __repr__(self):
return (
'CheckInfo('
f'name={self.name!r}, '
f'level={self.level}, '
f'startup={self.startup}, '
f'status={self.status}, '
f'failures={self.failures}, '
f'threshold={self.threshold!r}, '
Expand Down Expand Up @@ -2126,7 +2157,9 @@ def autostart_services(self, timeout: float = 30.0, delay: float = 0.1) -> Chang
return self._services_action('autostart', [], timeout, delay)

def replan_services(self, timeout: float = 30.0, delay: float = 0.1) -> ChangeID:
"""Replan by (re)starting changed and startup-enabled services and wait for them to start.
"""Replan by (re)starting changed and startup-enabled services and checks.
After requesting the replan, also wait for any impacted services to start.
Args:
timeout: Seconds before replan change is considered timed out (float). If
Expand Down Expand Up @@ -2335,6 +2368,19 @@ def _wait_change_using_polling(

raise TimeoutError(f'timed out waiting for change {change_id} ({timeout} seconds)')

def _checks_action(self, action: str, checks: Iterable[str]) -> List[str]:
if isinstance(checks, str) or not hasattr(checks, '__iter__'):
raise TypeError(f'checks must be of type Iterable[str], not {type(checks).__name__}')

checks = tuple(checks)
for chk in checks:
if not isinstance(chk, str):
raise TypeError(f'check names must be str, not {type(chk).__name__}')

body = {'action': action, 'checks': checks}
resp = self._request('POST', '/v1/checks', body=body)
return resp['result']['changed']

def add_layer(self, label: str, layer: Union[str, LayerDict, Layer], *, combine: bool = False):
"""Dynamically add a new layer onto the Pebble configuration layers.
Expand Down Expand Up @@ -3052,6 +3098,30 @@ def get_checks(
resp = self._request('GET', '/v1/checks', query)
return [CheckInfo.from_dict(info) for info in resp['result']]

def start_checks(self, checks: Iterable[str]) -> List[str]:
"""Start checks by name.
Args:
checks: Non-empty list of checks to start.
Returns:
Set of check names that were started. Checks that were already
running will not be included.
"""
return self._checks_action('start', checks)

def stop_checks(self, checks: Iterable[str]) -> List[str]:
"""Stop checks by name.
Args:
checks: Non-empty list of checks to stop.
Returns:
Set of check names that were stopped. Checks that were already
inactive will not be included.
"""
return self._checks_action('stop', checks)

def notify(
self,
type: NoticeType,
Expand Down
1 change: 1 addition & 0 deletions test/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ def mock_check_info(
'status': 'down',
'failures': 3,
'threshold': 3,
'change-id': '1',
})
]

Expand Down
5 changes: 5 additions & 0 deletions test/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1885,13 +1885,15 @@ def test_get_checks(self, container: ops.Container):
'status': 'up',
'failures': 0,
'threshold': 3,
'change-id': '1',
}),
pebble.CheckInfo.from_dict({
'name': 'c2',
'level': 'alive',
'status': 'down',
'failures': 2,
'threshold': 2,
'change-id': '2',
}),
]

Expand Down Expand Up @@ -1931,6 +1933,7 @@ def test_get_check(self, container: ops.Container):
'status': 'up',
'failures': 0,
'threshold': 3,
'change-id': '1',
})
])
c = container.get_check('c1')
Expand All @@ -1954,13 +1957,15 @@ def test_get_check(self, container: ops.Container):
'status': 'up',
'failures': 0,
'threshold': 3,
'change-id': '1',
}),
pebble.CheckInfo.from_dict({
'name': 'c2',
'level': 'alive',
'status': 'down',
'failures': 2,
'threshold': 2,
'change-id': '2',
}),
])
with pytest.raises(RuntimeError):
Expand Down
Loading

0 comments on commit d54a26a

Please sign in to comment.