Skip to content

Commit 360d126

Browse files
committed
marin: artifact.from_path falls back to .executor_status SUCCESS marker
When `.artifact` is absent but `.executor_status` reads `SUCCESS`, synthesize a `PathMetadata` pointing at the step's output dir so legacy steps that publish only the status marker are still loadable. The fallback applies for untyped calls and for callers asking specifically for `PathMetadata`; other types still raise. `PathMetadata` is now a pydantic model so it can be returned through the typed code path.
1 parent 9f43e8d commit 360d126

2 files changed

Lines changed: 93 additions & 13 deletions

File tree

lib/marin/src/marin/execution/artifact.py

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pydantic import BaseModel
99
from rigging.filesystem import marin_prefix, open_url
1010

11+
from marin.execution.executor_step_status import STATUS_SUCCESS, get_status_path
1112
from marin.execution.step_spec import StepSpec, _is_relative_path
1213

1314
T = TypeVar("T")
@@ -22,29 +23,60 @@ def from_path(cls, base_path: str | StepSpec, artifact_type: type[T]) -> T: ...
2223

2324
@overload
2425
@classmethod
25-
def from_path(cls, base_path: str | StepSpec) -> dict[str, Any]: ...
26+
def from_path(cls, base_path: str | StepSpec) -> "PathMetadata | dict[str, Any]": ...
2627

2728
@classmethod
28-
def from_path(cls, base_path: str | StepSpec, artifact_type: type[T] | None = None) -> T | dict[str, Any]:
29+
def from_path(
30+
cls, base_path: str | StepSpec, artifact_type: type[T] | None = None
31+
) -> "T | PathMetadata | dict[str, Any]":
2932
"""Load an Artifact instance from the specified output base path.
3033
3134
If ``base_path`` is a relative path (no URL scheme, doesn't start with ``/``),
3235
it is resolved against ``marin_prefix()``.
36+
37+
If ``base_path`` has no ``.artifact`` file but its ``.executor_status`` file
38+
contains ``SUCCESS``, returns a :class:`PathMetadata` pointing at ``base_path``
39+
— provided the caller asked for no specific type or for ``PathMetadata``.
3340
"""
3441

3542
if isinstance(base_path, StepSpec):
3643
base_path = base_path.output_path
3744
elif _is_relative_path(base_path):
3845
base_path = f"{marin_prefix()}/{base_path}"
3946

40-
with open_url(f"{base_path}/{cls.__artifact_file_name}", "rb") as fd:
41-
if artifact_type is None:
42-
return json.load(fd)
43-
if issubclass(artifact_type, BaseModel):
44-
return artifact_type.model_validate_json(fd.read())
45-
if is_dataclass(artifact_type):
46-
return artifact_type(**json.load(fd)) # type: ignore[not-callable]
47-
raise ValueError(f"Unsupported artifact type: {artifact_type!r}")
47+
try:
48+
with open_url(f"{base_path}/{cls.__artifact_file_name}", "rb") as fd:
49+
if artifact_type is None:
50+
return json.load(fd)
51+
if issubclass(artifact_type, BaseModel):
52+
return artifact_type.model_validate_json(fd.read())
53+
if is_dataclass(artifact_type):
54+
return artifact_type(**json.load(fd)) # type: ignore[not-callable]
55+
raise ValueError(f"Unsupported artifact type: {artifact_type!r}")
56+
except FileNotFoundError:
57+
return cls._from_executor_status(base_path, artifact_type)
58+
59+
@classmethod
60+
def _from_executor_status(cls, base_path: str, artifact_type: type[T] | None) -> "T | PathMetadata":
61+
"""Fallback when ``.artifact`` is absent: synthesize a :class:`PathMetadata`
62+
if the step published ``.executor_status = SUCCESS``.
63+
64+
Only valid when the caller wants no type or ``PathMetadata`` — other types
65+
cannot be reconstructed from a bare path.
66+
"""
67+
if artifact_type is not None and artifact_type is not PathMetadata:
68+
raise FileNotFoundError(
69+
f"No {cls.__artifact_file_name} at {base_path}; cannot synthesize "
70+
f"{artifact_type!r} from {get_status_path(base_path)!r}"
71+
)
72+
with open_url(get_status_path(base_path), "r") as fd:
73+
status = fd.read().strip()
74+
if status != STATUS_SUCCESS:
75+
raise FileNotFoundError(
76+
f"No {cls.__artifact_file_name} at {base_path} and "
77+
f"{get_status_path(base_path)!r} is {status!r} (not {STATUS_SUCCESS!r})"
78+
)
79+
return PathMetadata(path=base_path)
4880

4981
@classmethod
5082
def save(cls, artifact: T, base_path: str) -> None:
@@ -61,9 +93,12 @@ def save(cls, artifact: T, base_path: str) -> None:
6193
fd.write(json.dumps(artifact).encode("utf-8"))
6294

6395

64-
@dataclass
65-
class PathMetadata:
66-
"""Represents a single output path"""
96+
class PathMetadata(BaseModel):
97+
"""Represents a single output path.
98+
99+
Also used as the synthetic return type of :meth:`Artifact.from_path` when the
100+
step published a ``.executor_status = SUCCESS`` marker but no ``.artifact``.
101+
"""
67102

68103
path: str
69104

tests/execution/test_step_runner.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,51 @@ def test_artifact_load_relative_path_resolves_against_marin_prefix(tmp_path: Pat
125125
assert loaded == artifact
126126

127127

128+
def test_artifact_from_executor_status_success_untyped(tmp_path: Path):
129+
"""No .artifact, but .executor_status=SUCCESS: synthesize PathMetadata."""
130+
(tmp_path / ".executor_status").write_text("SUCCESS")
131+
132+
loaded = Artifact.from_path(tmp_path.as_posix())
133+
assert isinstance(loaded, PathMetadata)
134+
assert loaded.path == tmp_path.as_posix()
135+
136+
137+
def test_artifact_from_executor_status_success_typed_pathmetadata(tmp_path: Path):
138+
"""No .artifact, but .executor_status=SUCCESS and caller asks for PathMetadata."""
139+
(tmp_path / ".executor_status").write_text("SUCCESS")
140+
141+
loaded = Artifact.from_path(tmp_path.as_posix(), PathMetadata)
142+
assert loaded == PathMetadata(path=tmp_path.as_posix())
143+
144+
145+
def test_artifact_from_executor_status_success_typed_other_raises(tmp_path: Path):
146+
"""No .artifact, .executor_status=SUCCESS, but caller asks for a different type."""
147+
(tmp_path / ".executor_status").write_text("SUCCESS")
148+
149+
with pytest.raises(FileNotFoundError, match="cannot synthesize"):
150+
Artifact.from_path(tmp_path.as_posix(), TokenizeMetadata)
151+
152+
153+
def test_artifact_from_executor_status_non_success_raises(tmp_path: Path):
154+
"""No .artifact, .executor_status present but not SUCCESS."""
155+
(tmp_path / ".executor_status").write_text("RUNNING")
156+
157+
with pytest.raises(FileNotFoundError, match="not 'SUCCESS'"):
158+
Artifact.from_path(tmp_path.as_posix())
159+
160+
161+
def test_artifact_from_executor_status_relative_path(tmp_path: Path, monkeypatch):
162+
"""Fallback also works through MARIN_PREFIX-resolved relative paths."""
163+
monkeypatch.setenv("MARIN_PREFIX", tmp_path.as_posix())
164+
step_dir = tmp_path / "step_out"
165+
step_dir.mkdir()
166+
(step_dir / ".executor_status").write_text("SUCCESS")
167+
168+
loaded = Artifact.from_path("step_out")
169+
assert isinstance(loaded, PathMetadata)
170+
assert loaded.path == step_dir.as_posix()
171+
172+
128173
def test_artifact_save_and_load_untyped(tmp_path: Path):
129174
artifact = TokenizeMetadata(path="/tokenized", num_tokens=42)
130175
Artifact.save(artifact, tmp_path.as_posix())

0 commit comments

Comments
 (0)