Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions supervisor/apps/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
DockerBuildError,
DockerContainerPortConflict,
DockerError,
DockerNotFound,
DockerRegistryAuthError,
HostAppArmorError,
StoreAppNotFoundError,
Expand Down Expand Up @@ -258,14 +259,42 @@ async def load(self) -> None:

# Ensure we are using correct image for this system
await self.instance.check_image(self.version, default_image, self.arch)
except DockerError:
except DockerNotFound:
_LOGGER.info("No %s app Docker image %s found", self.slug, self.image)
with suppress(DockerError, AppNotSupportedError):
await self.instance.install(self.version, default_image, arch=self.arch)
if self.need_build:
# Don't run a local build during setup. Surface a repair so
# the resolution autofix loop can handle it off the critical
# path.
self._create_missing_image_issue()
else:
try:
await self.instance.install(
self.version, default_image, arch=self.arch
)
except (DockerError, AppNotSupportedError):
self._create_missing_image_issue()
except DockerError as err:
# Docker error other than a clean "image not found" - we can't
# tell whether the image is actually missing. Log so the issue
# is visible (CRITICAL is captured by the Sentry integration)
# and leave the app detached; the user can attempt a manual
# rebuild from the app page.
_LOGGER.critical(
"Docker error loading app %s, leaving detached: %s", self.slug, err
)

self.persist[ATTR_IMAGE] = default_image
await self.save_persist()

def _create_missing_image_issue(self) -> None:
"""Surface a repair suggestion for a missing app image."""
self.sys_resolution.create_issue(
IssueType.MISSING_IMAGE,
ContextType.ADDON,
reference=self.slug,
suggestions=[SuggestionType.EXECUTE_REPAIR],
)

@property
def ip_address(self) -> IPv4Address:
"""Return IP of app instance."""
Expand Down
17 changes: 13 additions & 4 deletions supervisor/docker/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,15 +448,20 @@ async def attach(
),
)

with suppress(aiodocker.DockerError):
if not self._meta and self.image:
if not self._meta and self.image:
try:
self._meta = await self.sys_docker.images.inspect(
f"{self.image}:{version!s}"
)
except aiodocker.DockerError as err:
if err.status != HTTPStatus.NOT_FOUND:
raise DockerAPIError(
f"Docker API error inspecting image {self.image}:{version!s}: {err!s}"
) from err

# Successful?
if not self._meta:
raise DockerError(
raise DockerNotFound(
f"Could not get metadata on container or image for {self.name}"
)
_LOGGER.info("Attaching to %s with version %s", self.image, self.version)
Expand Down Expand Up @@ -552,7 +557,11 @@ async def check_image(
try:
image = await self.sys_docker.images.inspect(image_name)
except aiodocker.DockerError as err:
raise DockerError(
if err.status == HTTPStatus.NOT_FOUND:
raise DockerNotFound(
f"Image {image_name} not found", _LOGGER.info
) from err
raise DockerAPIError(
f"Could not get {image_name} for check due to: {err!s}",
_LOGGER.error,
) from err
Expand Down
12 changes: 8 additions & 4 deletions supervisor/docker/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,13 +640,17 @@ async def run_command(
try:
await self.images.inspect(f"{image}:{tag}")
except aiodocker.DockerError as err:
if err.status == HTTPStatus.NOT_FOUND:
_LOGGER.info("Pulling image %s:%s", image, tag)
await self.images.pull(image, tag=tag)
else:
if err.status != HTTPStatus.NOT_FOUND:
raise DockerError(
f"Can't inspect image {image}:{tag}: {err}", _LOGGER.error
) from err
_LOGGER.info("Pulling image %s:%s", image, tag)
try:
await self.images.pull(image, tag=tag)
except aiodocker.DockerError as pull_err:
raise DockerError(
f"Can't pull image {image}:{tag}: {pull_err}", _LOGGER.error
) from pull_err

try:
container = await self._run(
Expand Down
23 changes: 22 additions & 1 deletion supervisor/resolution/fixups/app_execute_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import logging

from ...coresys import CoreSys
from ...exceptions import (
DockerBuildError,
DockerNoSpaceOnDevice,
DockerRegistryAuthError,
DockerRegistryRateLimitExceeded,
ResolutionFixupError,
)
from ..const import ContextType, IssueType, SuggestionType
from .base import FixupBase

Expand Down Expand Up @@ -44,7 +51,21 @@ async def process_fixup(self, reference: str | None = None) -> None:

_LOGGER.info("Installing image for app %s", reference)
self.attempts += 1
await app.instance.install(app.version)
try:
await app.instance.install(app.version)
except (
DockerBuildError,
DockerNoSpaceOnDevice,
DockerRegistryAuthError,
DockerRegistryRateLimitExceeded,
) as err:
# These failures won't be resolved by an immediate retry (broken
# Dockerfile or unavailable base/builder image; disk full; bad
# credentials; registry rate limit). Surface as a fixup error so
# FixupBase swallows it without a Sentry event. The repair stays
# available for manual retry once the underlying cause is fixed.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something I realized looking at this - there's only one instance of this class created total. Its created at Supervisor startup and we use the same instance during the entire time Supervisor is running. So self.attempts is never reset, once you get 5 failures then this is just a manual fixup until Supervisor is restarted.

If we're trying to improve this fixup, maybe we want to reset attempts on success? Or make give each addon its own attempts count using a dictionary? Existing issue so doesn't have to be tackled here, just noting it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah since this is a rather corner case issue I'd rather prefer to not add more complexity.

_LOGGER.warning("Cannot repair app %s: %s", reference, err)
raise ResolutionFixupError() from err

@property
def suggestion(self) -> SuggestionType:
Expand Down
100 changes: 49 additions & 51 deletions tests/apps/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from supervisor.apps.app import App
from supervisor.apps.const import AppBackupMode
from supervisor.apps.model import AppModel
from supervisor.config import CoreConfig
from supervisor.const import ATTR_ADVANCED, AppBoot, AppState, BusEvent
from supervisor.coresys import CoreSys
from supervisor.docker.app import DockerApp
Expand Down Expand Up @@ -1076,75 +1075,70 @@ async def test_app_loads_wrong_image(


@pytest.mark.usefixtures("mock_amd64_arch_supported")
async def test_app_loads_missing_image(coresys: CoreSys, install_app_ssh: App):
"""Test app corrects a missing image on load."""
async def test_app_loads_missing_image_build(coresys: CoreSys, install_app_ssh: App):
"""Test build-required app surfaces a repair when image is missing on load."""
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)

with (
patch("pathlib.Path.is_file", return_value=True),
patch.object(
coresys.docker,
"run_command",
return_value=CommandReturn(0, ["Build successful"]),
) as mock_run_command,
patch.object(
type(coresys.config),
"local_to_extern_path",
return_value=PurePath("/addon/path/on/host"),
),
):
with patch.object(
coresys.docker,
"run_command",
return_value=CommandReturn(0, ["Build successful"]),
) as mock_run_command:
await install_app_ssh.load()

mock_run_command.assert_called_once()
assert mock_run_command.call_args.args[0] == "docker"
assert mock_run_command.call_args.kwargs["tag"] == "1.0.0-cli"
command = mock_run_command.call_args.kwargs["command"]
assert is_in_list(
["--platform", "linux/amd64"],
command,
# Build-required apps must not run a build during load. A repair is
# raised so the resolution autofix loop handles it off the critical path.
mock_run_command.assert_not_called()
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)
assert is_in_list(
["--tag", "local/amd64-addon-ssh:9.2.1"],
command,
assert issue in coresys.resolution.issues
suggestions = coresys.resolution.suggestions_for_issue(issue)
assert any(s.type == SuggestionType.EXECUTE_REPAIR for s in suggestions)


@pytest.mark.usefixtures("mock_amd64_arch_supported")
async def test_app_loads_missing_image_pull(coresys: CoreSys, install_app_ssh: App):
"""Test pullable app installs the missing image during load."""
install_app_ssh.data["image"] = "test/amd64-addon-ssh"
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)
assert install_app_ssh.image == "local/amd64-addon-ssh"

with patch.object(DockerAPI, "pull_image") as mock_pull_image:
await install_app_ssh.load()

mock_pull_image.assert_called_once()
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)
assert issue not in coresys.resolution.issues


@pytest.mark.usefixtures("container", "mock_amd64_arch_supported")
async def test_app_load_succeeds_with_docker_errors(
coresys: CoreSys, install_app_ssh: App, caplog: pytest.LogCaptureFixture
):
"""Docker errors while building/pulling an image during load should not raise and fail setup."""
# Build env invalid failure
"""Docker errors during load should not raise and fail setup."""
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)

# Build-required app with missing image: repair issue raised, no exception
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)
caplog.clear()
await install_app_ssh.load()
assert "Cannot build app 'local_ssh' because dockerfile is missing" in caplog.text

# Image build failure
caplog.clear()
with (
patch("pathlib.Path.is_file", return_value=True),
patch.object(
CoreConfig,
"local_to_extern_path",
return_value=PurePath("/addon/path/on/host"),
),
patch.object(
DockerAPI, "run_command", return_value=CommandReturn(1, ["error"])
),
):
await install_app_ssh.load()
assert (
"Docker build failed for local/amd64-addon-ssh:9.2.1 (exit code 1). Build output:\nerror"
in caplog.text
)
assert issue in coresys.resolution.issues

# Image pull failure
# Pull-based app where check_image's internal install fails: addon left
# detached, no exception escapes to abort setup. The next load will hit
# DockerNotFound and trigger the proper repair path.
stored = coresys.resolution.get_issue_if_present(issue)
coresys.resolution.dismiss_issue(stored)
install_app_ssh.data["image"] = "test/amd64-addon-ssh"
caplog.clear()
with patch.object(
Expand All @@ -1153,7 +1147,11 @@ async def test_app_load_succeeds_with_docker_errors(
side_effect=aiodocker.DockerError(400, {"message": "error"}),
):
await install_app_ssh.load()
assert "Can't install test/amd64-addon-ssh:9.2.1:" in caplog.text
assert "Docker error loading app local_ssh, leaving detached" in caplog.text
assert any(
"Docker error loading app local_ssh" in r.message and r.levelname == "CRITICAL"
for r in caplog.records
)


@pytest.mark.usefixtures("coresys")
Expand Down