Skip to content

Commit 9d44622

Browse files
committed
fix(dask): catch cleanup errors on cluster creation failures
When the creation of Dask Kubernetes resources fails, the cleanup call to delete_dask_cluster() could itself raise RuntimeError. (E.g. when CRDs are not installed.) The error then propagated beyond ApiException and surfaced as a 500 Internal Server Error, instead of providing a meaningful error message. This commit fixes the problem.
1 parent 9044623 commit 9d44622

2 files changed

Lines changed: 23 additions & 1 deletion

File tree

reana_workflow_controller/dask.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,12 @@ def create_dask_resources(self):
147147
logging.error(
148148
f"An error occured while trying to create dask cluster, now deleting the cluster... Error message:\n{e}"
149149
)
150-
delete_dask_cluster(self.workflow_id, self.user_id)
150+
try:
151+
delete_dask_cluster(self.workflow_id, self.user_id)
152+
except Exception:
153+
logging.exception(
154+
"Failed to clean up Dask resources after creation error."
155+
)
151156

152157
def _prepare_cluster(self):
153158
"""Prepare Dask cluster body by adding necessary image-pull secrets, volumes, volume mounts, init containers and sidecar containers."""

tests/test_dask.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,23 @@ def test_add_hostpath_volumes_with_mounts(
153153
)
154154

155155

156+
def test_create_dask_resources_cleanup_error_is_swallowed(dask_resource_manager):
157+
"""Test that cleanup errors after creation failure do not propagate."""
158+
with patch.object(
159+
dask_resource_manager,
160+
"_prepare_cluster",
161+
), patch.object(
162+
dask_resource_manager,
163+
"_create_dask_cluster",
164+
side_effect=Exception("CRD not found"),
165+
), patch(
166+
"reana_workflow_controller.dask.delete_dask_cluster",
167+
side_effect=RuntimeError("cleanup failed"),
168+
):
169+
# Should not raise despite both creation and cleanup failing
170+
dask_resource_manager.create_dask_resources()
171+
172+
156173
def test_create_dask_resources(dask_resource_manager):
157174
"""Test create_dask_resources method."""
158175
# Patch internal methods that should be called

0 commit comments

Comments
 (0)