Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions buildrunner/docker/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,12 @@ def stop(self):
self.log.write(
f"Destroying Docker daemon container {self._daemon_container:.10}\n"
)
if self._daemon_container:
self.docker_client.remove_container(
self._daemon_container,
force=True,
v=True,
)
try:
if self._daemon_container:
self.docker_client.remove_container(
self._daemon_container,
force=True,
v=True,
)
except Exception as _ex:
self.log.write(f"Failed to remove Docker daemon container: {_ex}\n")
70 changes: 39 additions & 31 deletions buildrunner/docker/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,16 @@ def cleanup(self):
)
if container_ids:
for container_id in container_ids:
self.docker_client.remove_container(
container_id["Id"],
force=True,
v=True,
)
try:
self.docker_client.remove_container(
container_id["Id"],
force=True,
v=True,
)
except Exception as _ex:
print(
f'Unable to delete docker container with id "{container_id["Id"]}"'
)
else:
print(
f'Unable to find docker container with name or label "{container}"'
Expand All @@ -340,11 +345,16 @@ def cleanup(self):
print(
f'Unable to find docker container with name or label "{container}"'
)
self.docker_client.remove_container(
self.container["Id"],
force=True,
v=True,
)
try:
self.docker_client.remove_container(
self.container["Id"],
force=True,
v=True,
)
except docker.errors.NotFound:
print(
f'Unable to delete docker container with id "{self.container["Id"]}"'
)

self.container = None

Expand All @@ -353,16 +363,16 @@ def _get_cache_file_from_prefix(
logger: ContainerLogger, local_cache_archive_file: str, docker_path: str
) -> Optional[str]:
if os.path.exists(local_cache_archive_file):
logger.write(
f"Using cache {local_cache_archive_file} for destination path {docker_path}\n"
logger.info(
f"Using cache {local_cache_archive_file} for destination path {docker_path}"
)
return local_cache_archive_file
cache_dir = os.path.dirname(local_cache_archive_file)

if not os.path.exists(cache_dir):
logger.write(
logger.info(
f"Cache directory {cache_dir} does not exist, "
f"skipping restore of archive {local_cache_archive_file}\n"
f"skipping restore of archive {local_cache_archive_file}"
)
return None

Expand All @@ -381,14 +391,14 @@ def _get_cache_file_from_prefix(
local_cache_archive_match = curr_archive_file

if local_cache_archive_match is None:
logger.write(
logger.info(
f"Not able to restore cache {docker_path} since "
f"there was no matching prefix for `{local_cache_archive_file}`\n"
f"there was no matching prefix for `{local_cache_archive_file}`"
)
return None
logger.write(
logger.info(
f"Found cache {local_cache_archive_match} matching prefix {local_cache_archive_file} "
f"for destination path {docker_path}\n"
f"for destination path {docker_path}"
)

return local_cache_archive_match
Expand Down Expand Up @@ -422,9 +432,9 @@ def restore_caches(self, logger: ContainerLogger, caches: OrderedDict) -> None:
restored_cache_src = set()
for local_cache_archive_file, docker_path in caches.items():
if docker_path in restored_cache_src:
logger.write(
logger.info(
f"Cache for destination path {docker_path} has already been matched and restored to the container, "
f"skipping {local_cache_archive_file}\n"
f"skipping {local_cache_archive_file}"
)
continue

Expand Down Expand Up @@ -507,13 +517,13 @@ def write_cache_history_log(
file_obj = acquire_flock_open_write_binary(
lock_file=cache_history_log, logger=logger, mode="a"
)
logger.write(
logger.info(
f"File lock acquired. Attempting to write cache history log to {cache_history_log}"
)
file_obj.write(log_str)
finally:
release_flock(file_obj, logger)
logger.write("Writing to cache history log completed. Released file lock.")
logger.info("Writing to cache history log completed, released file lock")

def save_caches(
self, logger: ContainerLogger, caches: OrderedDict, env_vars: dict = dict()
Expand All @@ -526,10 +536,10 @@ def save_caches(
for local_cache_archive_file, docker_path in caches.items():
if docker_path not in saved_cache_src:
saved_cache_src.add(docker_path)
logger.write(
f"Saving cache `{docker_path}` "
logger.info(
f"Saving cache {docker_path} "
f"running on container {self.container['Id']} "
f"to local cache `{local_cache_archive_file}`\n"
f"to local cache {local_cache_archive_file}"
)

log_line = (
Expand All @@ -551,9 +561,7 @@ def save_caches(
file_obj = acquire_flock_open_write_binary(
lock_file=local_cache_archive_file, logger=logger
)
logger.write(
"File lock acquired. Attempting to write to cache."
)
logger.info("File lock acquired. Attempting to write to cache.")
self._write_cache(docker_path, file_obj)
except Exception as e:
raise BuildRunnerSavingCache(
Expand All @@ -564,11 +572,11 @@ def save_caches(
assert tarfile.is_tarfile(
local_cache_archive_file
), f"Failed to create cache {local_cache_archive_file} tar file."
logger.write("Writing to cache completed. Released file lock.")
logger.info("Writing to cache completed. Released file lock.")
else:
logger.write(
logger.info(
f"The following `{docker_path}` in docker has already been saved. "
f"It will not be saved again to `{local_cache_archive_file}`\n"
f"It will not be saved again to `{local_cache_archive_file}`"
)

def run(self, cmd, console=None, stream=True, log=None, workdir=None):
Expand Down
16 changes: 8 additions & 8 deletions buildrunner/sshagent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,16 +245,13 @@ def stop(self):
# kill ssh connection thread
self.log.write("Closing ssh-agent container connection\n")
if self._ssh_client:
# pylint: disable=W0212
if self._ssh_client._agent:
try:
self._ssh_client._agent.close()
# pylint: disable=W0703
except Exception as _ex:
self.log.write(f"Error stopping ssh-agent: {_ex}\n")
try:
self._ssh_client.close()
# pylint: disable=W0703
except Exception as _ex:
self.log.write(f"Error stopping ssh-agent connection: {_ex}\n")

Expand All @@ -263,11 +260,14 @@ def stop(self):
self.log.write(
f"Destroying ssh-agent container {self._ssh_agent_container:.10}\n"
)
self.docker_client.remove_container(
self._ssh_agent_container,
force=True,
v=True,
)
try:
self.docker_client.remove_container(
self._ssh_agent_container,
force=True,
v=True,
)
except Exception as _ex:
self.log.write(f"Error destroying ssh-agent container: {_ex}\n")

def get_ssh_agent_image(self):
"""
Expand Down
17 changes: 13 additions & 4 deletions buildrunner/steprunner/tasks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ def __del__(self):
self.step_runner.log.info(
f"Deleting network {self.step_runner.network_name}"
)
self._docker_client.remove_network(self.step_runner.network_name)
try:
self._docker_client.remove_network(self.step_runner.network_name)
except Exception as _ex:
self.step_runner.log.error(f"Error removing network: {_ex}")

def _get_source_container(self):
"""
Expand Down Expand Up @@ -1119,9 +1122,15 @@ def run(self, context: dict): # pylint: disable=too-many-statements,too-many-br
"Skipping cache save due to failed exit code"
)
else:
self.runner.save_caches(
container_meta_logger, caches, container_args.get("environment")
)
try:
self.runner.save_caches(
container_meta_logger, caches, container_args.get("environment")
)
except Exception as _ex:
container_meta_logger.error(
f"Error saving caches, ignoring: {_ex}",
)
# Failing to save caches should not fail the build, just continue as normal

finally:
if self.runner:
Expand Down
6 changes: 2 additions & 4 deletions buildrunner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,7 @@ def get_lock(file_obj: io.IOBase):
f"PID:{pid} failed to acquire file lock for {lock_file} after timeout of {timeout_seconds} seconds"
)

logger.info(
f"PID:{pid} file opened and lock acquired for {lock_file} ({lock_file_obj})"
)
logger.info(f"PID:{pid} file opened and lock acquired for {lock_file}")

return lock_file_obj

Expand Down Expand Up @@ -308,4 +306,4 @@ def release_flock(
return
portalocker.unlock(lock_file_obj)
lock_file_obj.close()
logger.write(f"PID:{os.getpid()} released and closed file {lock_file_obj}")
logger.info(f"PID:{os.getpid()} released and closed file {lock_file_obj.name}")
Loading