Skip to content

Commit 946ab06

Browse files
ravwojdylaclaude
andcommitted
marin: rename .artifact -> artifact.json with legacy fallback
Addresses #5732 (comment). `Artifact.save` now writes `artifact.json`; `from_path` reads it first and falls back to the legacy `.artifact` dotfile so historical GCS outputs remain loadable. The fallback can be deleted once those prefixes are gone. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 7db6a14 commit 946ab06

2 files changed

Lines changed: 33 additions & 19 deletions

File tree

lib/marin/src/marin/execution/artifact.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515

1616

1717
class Artifact:
18-
__artifact_file_name = ".artifact"
18+
__artifact_file_name = "artifact.json"
19+
# Legacy filename written before the rename; read-only fallback so historical
20+
# GCS outputs remain loadable. Safe to remove once those prefixes are gone.
21+
__legacy_artifact_file_name = ".artifact"
1922

2023
@overload
2124
@classmethod
@@ -34,29 +37,32 @@ def from_path(
3437
If ``base_path`` is a relative path (no URL scheme, doesn't start with ``/``),
3538
it is resolved against ``marin_prefix()``.
3639
37-
If ``base_path`` has no ``.artifact`` file but its ``.executor_status`` file
38-
contains ``SUCCESS``, returns a :class:`PathMetadata` pointing at ``base_path``
39-
— provided the caller asked for no specific type or for ``PathMetadata``.
40+
If ``base_path`` has no ``artifact.json`` (or legacy ``.artifact``) file but
41+
its ``.executor_status`` file contains ``SUCCESS``, returns a
42+
:class:`PathMetadata` pointing at ``base_path`` — provided the caller asked
43+
for no specific type or for ``PathMetadata``.
4044
"""
4145

4246
if isinstance(base_path, StepSpec):
4347
base_path = base_path.output_path
4448
elif _is_relative_path(base_path):
4549
base_path = f"{marin_prefix()}/{base_path}"
4650

47-
try:
48-
with open_url(f"{base_path}/{cls.__artifact_file_name}", "rb") as fd:
49-
if artifact_type is None:
50-
return json.load(fd)
51-
if not issubclass(artifact_type, BaseModel):
52-
raise TypeError(f"artifact_type must be a pydantic BaseModel subclass, got {artifact_type!r}")
53-
return artifact_type.model_validate_json(fd.read())
54-
except FileNotFoundError:
55-
return cls._from_executor_status(base_path, artifact_type)
51+
for file_name in (cls.__artifact_file_name, cls.__legacy_artifact_file_name):
52+
try:
53+
with open_url(f"{base_path}/{file_name}", "rb") as fd:
54+
if artifact_type is None:
55+
return json.load(fd)
56+
if not issubclass(artifact_type, BaseModel):
57+
raise TypeError(f"artifact_type must be a pydantic BaseModel subclass, got {artifact_type!r}")
58+
return artifact_type.model_validate_json(fd.read())
59+
except FileNotFoundError:
60+
continue
61+
return cls._from_executor_status(base_path, artifact_type)
5662

5763
@classmethod
5864
def _from_executor_status(cls, base_path: str, artifact_type: type[T] | None) -> "T | PathMetadata":
59-
"""Fallback when ``.artifact`` is absent: synthesize a :class:`PathMetadata`
65+
"""Fallback when no artifact file is present: synthesize a :class:`PathMetadata`
6066
if the step published ``.executor_status = SUCCESS``.
6167
6268
Only valid when the caller wants no type or ``PathMetadata`` — other types

tests/execution/test_step_runner.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def test_artifact_load_relative_path_resolves_against_marin_prefix(tmp_path: Pat
125125

126126

127127
def test_artifact_from_executor_status_success_untyped(tmp_path: Path):
128-
"""No .artifact, but .executor_status=SUCCESS: synthesize PathMetadata."""
128+
"""No artifact file, but .executor_status=SUCCESS: synthesize PathMetadata."""
129129
(tmp_path / ".executor_status").write_text("SUCCESS")
130130

131131
loaded = Artifact.from_path(tmp_path.as_posix())
@@ -134,29 +134,37 @@ def test_artifact_from_executor_status_success_untyped(tmp_path: Path):
134134

135135

136136
def test_artifact_from_executor_status_success_typed_pathmetadata(tmp_path: Path):
137-
"""No .artifact, but .executor_status=SUCCESS and caller asks for PathMetadata."""
137+
"""No artifact file, but .executor_status=SUCCESS and caller asks for PathMetadata."""
138138
(tmp_path / ".executor_status").write_text("SUCCESS")
139139

140140
loaded = Artifact.from_path(tmp_path.as_posix(), PathMetadata)
141141
assert loaded == PathMetadata(path=tmp_path.as_posix())
142142

143143

144144
def test_artifact_from_executor_status_success_typed_other_raises(tmp_path: Path):
145-
"""No .artifact, .executor_status=SUCCESS, but caller asks for a different type."""
145+
"""No artifact file, .executor_status=SUCCESS, but caller asks for a different type."""
146146
(tmp_path / ".executor_status").write_text("SUCCESS")
147147

148148
with pytest.raises(FileNotFoundError, match="cannot synthesize"):
149149
Artifact.from_path(tmp_path.as_posix(), TokenizeMetadata)
150150

151151

152152
def test_artifact_from_executor_status_non_success_raises(tmp_path: Path):
153-
"""No .artifact, .executor_status present but not SUCCESS."""
153+
"""No artifact file, .executor_status present but not SUCCESS."""
154154
(tmp_path / ".executor_status").write_text("RUNNING")
155155

156156
with pytest.raises(FileNotFoundError, match="not 'SUCCESS'"):
157157
Artifact.from_path(tmp_path.as_posix())
158158

159159

160+
def test_artifact_load_legacy_dotfile(tmp_path: Path):
161+
"""Historical outputs wrote `.artifact`; from_path should still load them."""
162+
(tmp_path / ".artifact").write_text(json.dumps({"path": "/legacy"}))
163+
164+
loaded = Artifact.from_path(tmp_path.as_posix(), PathMetadata)
165+
assert loaded == PathMetadata(path="/legacy")
166+
167+
160168
def test_artifact_save_and_load_untyped(tmp_path: Path):
161169
artifact = TokenizeMetadata(path="/tokenized", num_tokens=42)
162170
Artifact.save(artifact, tmp_path.as_posix())
@@ -440,7 +448,7 @@ def test_runner_skips_completed_steps(tmp_path: Path):
440448
runner1.run(steps)
441449

442450
# Record modification times
443-
tokenize_artifact_path = os.path.join(steps[1].output_path, ".artifact")
451+
tokenize_artifact_path = os.path.join(steps[1].output_path, "artifact.json")
444452
mtime_before = os.path.getmtime(tokenize_artifact_path)
445453

446454
# Re-run — all steps should be skipped

0 commit comments

Comments
 (0)