Skip to content

Commit 0af0c98

Browse files
author
Nissan Pow
committed
fix: improve cleanup reliability and test coverage for metaflow resume
1 parent ddcaccd commit 0af0c98

2 files changed

Lines changed: 46 additions & 13 deletions

File tree

metaflow/cmd/resume/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,15 @@ def resume(ctx, run_pathspec, step_name):
8181

8282
# Extract code to a temporary directory
8383
_echo("Extracting code package...")
84-
tmp_dir = code.extract()
85-
tmp_path = tmp_dir.name
84+
try:
85+
tmp_dir = code.extract()
86+
except Exception as e:
87+
_echo_error(
88+
"Failed to download code package for *%s/%s*: %s" % (flow_name, run_id, e)
89+
)
90+
raise SystemExit(1)
8691

92+
tmp_path = tmp_dir.name
8793
try:
8894
script_path = os.path.join(tmp_path, script_name)
8995
if not os.path.isfile(script_path):
@@ -105,6 +111,11 @@ def resume(ctx, run_pathspec, step_name):
105111
_echo("")
106112

107113
result = subprocess.run(cmd, cwd=tmp_path)
114+
if result.returncode != 0:
115+
_echo_error(
116+
"Resume of *%s/%s* failed with exit code %d."
117+
% (flow_name, run_id, result.returncode)
118+
)
108119
raise SystemExit(result.returncode)
109120
finally:
110121
# Clean up the temporary directory

test/cmd/resume/test_metaflow_resume.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
2+
import sys
23
import tempfile
34

45
import pytest
5-
from unittest.mock import MagicMock, patch, PropertyMock
6+
from unittest.mock import MagicMock, patch
67

78
from metaflow._vendor.click.testing import CliRunner
89

@@ -43,6 +44,8 @@ def test_invalid_pathspec(self, mock_run_cls, mock_ns):
4344
runner = CliRunner()
4445
result = runner.invoke(cli, ["resume", "InvalidPathspec"])
4546
assert result.exit_code != 0
47+
# Run should never be instantiated for a bad pathspec
48+
mock_run_cls.assert_not_called()
4649

4750
@patch("metaflow.cmd.resume.namespace")
4851
@patch("metaflow.cmd.resume.Run")
@@ -51,6 +54,7 @@ def test_run_not_found(self, mock_run_cls, mock_ns):
5154
runner = CliRunner()
5255
result = runner.invoke(cli, ["resume", "MyFlow/123"])
5356
assert result.exit_code == 1
57+
mock_run_cls.assert_called_once_with("MyFlow/123", _namespace_check=False)
5458

5559
@patch("metaflow.cmd.resume.namespace")
5660
@patch("metaflow.cmd.resume.Run")
@@ -62,6 +66,20 @@ def test_no_code_package(self, mock_run_cls, mock_ns):
6266
result = runner.invoke(cli, ["resume", "MyFlow/123"])
6367
assert result.exit_code == 1
6468

69+
@patch("metaflow.cmd.resume.namespace")
70+
@patch("metaflow.cmd.resume.Run")
71+
def test_code_package_extract_fails(self, mock_run_cls, mock_ns):
72+
"""When code.extract() raises, exit 1 with error message."""
73+
mock_run = MagicMock()
74+
mock_run.code.script_name = "myflow.py"
75+
mock_run.code.extract.side_effect = Exception("Download failed: 404")
76+
mock_run_cls.return_value = mock_run
77+
78+
runner = CliRunner(mix_stderr=False)
79+
result = runner.invoke(cli, ["resume", "MyFlow/999"])
80+
assert result.exit_code == 1
81+
assert "Failed to download code package" in result.stderr
82+
6583
@patch("metaflow.cmd.resume.subprocess.run")
6684
@patch("metaflow.cmd.resume.namespace")
6785
@patch("metaflow.cmd.resume.Run")
@@ -73,13 +91,15 @@ def test_basic_resume(self, mock_run_cls, mock_ns, mock_subprocess):
7391
runner = CliRunner()
7492
result = runner.invoke(cli, ["resume", "MyFlow/123"])
7593

76-
# Verify subprocess was called with the right arguments
7794
mock_subprocess.assert_called_once()
7895
cmd = mock_subprocess.call_args[0][0]
79-
assert "myflow.py" in cmd[1]
80-
assert "resume" in cmd
81-
assert "--origin-run-id" in cmd
82-
assert "123" in cmd
96+
# Verify exact command structure: python <script> resume --origin-run-id <id>
97+
assert cmd[0] == sys.executable
98+
assert cmd[1].endswith("myflow.py")
99+
assert cmd[2] == "resume"
100+
assert cmd[3] == "--origin-run-id"
101+
assert cmd[4] == "123"
102+
assert len(cmd) == 5 # no extra args
83103

84104
@patch("metaflow.cmd.resume.subprocess.run")
85105
@patch("metaflow.cmd.resume.namespace")
@@ -94,7 +114,10 @@ def test_resume_with_step(self, mock_run_cls, mock_ns, mock_subprocess):
94114

95115
mock_subprocess.assert_called_once()
96116
cmd = mock_subprocess.call_args[0][0]
97-
assert "train" in cmd
117+
assert cmd[2] == "resume"
118+
assert cmd[3] == "--origin-run-id"
119+
assert cmd[4] == "123"
120+
assert cmd[5] == "train"
98121

99122
@patch("metaflow.cmd.resume.subprocess.run")
100123
@patch("metaflow.cmd.resume.namespace")
@@ -119,14 +142,15 @@ def test_resume_passthrough_args(self, mock_run_cls, mock_ns, mock_subprocess):
119142
@patch("metaflow.cmd.resume.subprocess.run")
120143
@patch("metaflow.cmd.resume.namespace")
121144
@patch("metaflow.cmd.resume.Run")
122-
def test_resume_propagates_exit_code(self, mock_run_cls, mock_ns, mock_subprocess):
145+
def test_nonzero_exit_prints_error(self, mock_run_cls, mock_ns, mock_subprocess):
123146
mock_run = self._make_mock_run(script_name="myflow.py")
124147
mock_run_cls.return_value = mock_run
125148
mock_subprocess.return_value = MagicMock(returncode=42)
126149

127-
runner = CliRunner()
150+
runner = CliRunner(mix_stderr=False)
128151
result = runner.invoke(cli, ["resume", "MyFlow/123"])
129152
assert result.exit_code == 42
153+
assert "failed with exit code 42" in result.stderr
130154

131155
@patch("metaflow.cmd.resume.subprocess.run")
132156
@patch("metaflow.cmd.resume.namespace")
@@ -136,13 +160,11 @@ def test_resume_cleans_up_temp_dir(self, mock_run_cls, mock_ns, mock_subprocess)
136160
mock_run_cls.return_value = mock_run
137161
mock_subprocess.return_value = MagicMock(returncode=0)
138162

139-
# Capture the tmp_path used
140163
tmp_path = mock_run.code.extract.return_value.name
141164

142165
runner = CliRunner()
143166
result = runner.invoke(cli, ["resume", "MyFlow/123"])
144167

145-
# Temp directory should be cleaned up
146168
assert not os.path.exists(tmp_path)
147169

148170
@patch("metaflow.cmd.resume.namespace")

0 commit comments

Comments
 (0)