Skip to content

Commit 9c7a5e3

Browse files
romain-intelclaude
andcommitted
Add USER_CONTENT merge tests + minor review cleanups
Address code-review feedback on the USER_CONTENT merge change: DEF-010 — Unit tests for _user_code_tuples merge path: - Emits an addl USER_CONTENT file not covered by the walker. - Drops an addl file whose arcname was already yielded by the walker (walker's copy wins). - USER_CONTENT bypasses the walker's suffix/user filter, so a file excluded by the filter still gets packaged when contributed via add_to_package. DEF-011 — Integration tests covering the two-phase design: - _add_addl_files records USER_CONTENT, then _user_code_tuples merges with the walker without duplicating an arcname already present in the flow dir. - A decorator can contribute a file that lives outside the flow dir; it flows end-to-end into the user code tuples. Minor: - Remove unused Optional import in package_suffixes_mutator.py. - Document why _add_tuple intentionally skips USER_CONTENT (mfcontent vs. user-code namespaces are separate). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 311e744 commit 9c7a5e3

3 files changed

Lines changed: 187 additions & 1 deletion

File tree

metaflow/package/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,11 @@ def _check_tuple(path_tuple):
639639
return path_tuple
640640

641641
def _add_tuple(path_tuple):
642+
# USER_CONTENT is intentionally NOT handled here: those files are
643+
# packaged alongside the user's flow code (see _user_code_tuples)
644+
# rather than under the mfcontent namespace, and are tracked in
645+
# self._user_content_from_addl by _check_tuple above. mfcontent
646+
# owns MODULE/CODE/OTHER only.
642647
file_path, file_name, file_type = path_tuple
643648
if file_type == ContentType.MODULE_CONTENT:
644649
# file_path is actually a module

metaflow/plugins/package_suffixes_mutator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def start(self):
2424

2525
import inspect
2626
import os
27-
from typing import List, Optional, Union
27+
from typing import List, Union
2828

2929
from metaflow.packaging_sys import ContentType
3030
from metaflow.packaging_sys.utils import walk

test/unit/test_add_to_package.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
"""
66

77
import os
8+
import sys
89
import tempfile
910
from types import ModuleType
11+
from unittest import mock
1012
from unittest.mock import MagicMock, call
1113

1214
import pytest
@@ -262,3 +264,182 @@ def test_user_content_duplicate_different_path_raises():
262264
with pytest.raises(NonUniqueFileNameToFilePathMappingException):
263265
pkg = _build_pkg(flow, _make_environment(), _make_mfcontent())
264266
pkg._add_addl_files()
267+
268+
269+
# ---------------------------------------------------------------------------
270+
# _user_code_tuples — merge of USER_CONTENT with the flow-dir walker (DEF-010)
271+
# ---------------------------------------------------------------------------
272+
273+
274+
def _build_pkg_for_user_tuples(tmpdir, user_content_from_addl=None):
275+
"""Build a minimal MetaflowPackage with just enough state for
276+
_user_code_tuples(): a flow dir, filter, exclude list, and the
277+
dict of USER_CONTENT files produced by add_to_package."""
278+
pkg = object.__new__(MetaflowPackage)
279+
pkg._user_code_filter = lambda _name: True
280+
pkg._exclude_tl_dirs = []
281+
pkg._user_content_from_addl = user_content_from_addl or {}
282+
pkg._user_flow_dir = None
283+
return pkg
284+
285+
286+
def _fake_flow_dir(tmpdir, *relpaths):
287+
"""Create `flow.py` plus the given relative paths in tmpdir. Returns the
288+
absolute flow.py path."""
289+
flow_file = os.path.join(tmpdir, "flow.py")
290+
with open(flow_file, "w") as f:
291+
f.write("# flow\n")
292+
for rel in relpaths:
293+
p = os.path.join(tmpdir, rel)
294+
os.makedirs(os.path.dirname(p), exist_ok=True)
295+
with open(p, "w") as f:
296+
f.write("x\n")
297+
return flow_file
298+
299+
300+
def test_user_code_tuples_emits_addl_user_content_not_in_walker():
301+
"""A USER_CONTENT file outside the walker's output gets emitted."""
302+
with tempfile.TemporaryDirectory() as flow_dir, tempfile.NamedTemporaryFile(
303+
suffix=".cfg", delete=False
304+
) as external:
305+
try:
306+
_fake_flow_dir(flow_dir, "code.py")
307+
pkg = _build_pkg_for_user_tuples(
308+
flow_dir,
309+
user_content_from_addl={"extra.cfg": external.name},
310+
)
311+
with mock.patch.object(
312+
sys, "argv", [os.path.join(flow_dir, "flow.py")]
313+
), mock.patch("metaflow.R.use_r", return_value=False):
314+
tuples = list(pkg._user_code_tuples())
315+
by_arc = {arc: path for path, arc in tuples}
316+
# walker picked up code.py and flow.py from the flow dir
317+
assert "code.py" in by_arc
318+
assert "flow.py" in by_arc
319+
# external USER_CONTENT was emitted as well
320+
assert by_arc["extra.cfg"] == external.name
321+
finally:
322+
os.unlink(external.name)
323+
324+
325+
def test_user_code_tuples_skips_addl_when_walker_already_has_it():
326+
"""USER_CONTENT with same arcname as a walker-yielded file is dropped."""
327+
with tempfile.TemporaryDirectory() as flow_dir:
328+
_fake_flow_dir(flow_dir, "code.py")
329+
walker_path = os.path.join(flow_dir, "code.py")
330+
# A *different* absolute path but same arcname. The walker wins.
331+
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as shadow:
332+
try:
333+
pkg = _build_pkg_for_user_tuples(
334+
flow_dir,
335+
user_content_from_addl={"code.py": shadow.name},
336+
)
337+
with mock.patch.object(
338+
sys, "argv", [os.path.join(flow_dir, "flow.py")]
339+
), mock.patch("metaflow.R.use_r", return_value=False):
340+
tuples = list(pkg._user_code_tuples())
341+
# code.py appears exactly once, with the walker's path
342+
code_py = [t for t in tuples if t[1] == "code.py"]
343+
assert len(code_py) == 1
344+
assert code_py[0][0] == walker_path
345+
# shadow is never emitted
346+
assert not any(t[0] == shadow.name for t in tuples)
347+
finally:
348+
os.unlink(shadow.name)
349+
350+
351+
def test_user_code_tuples_respects_user_code_filter():
352+
"""USER_CONTENT bypasses the suffix/user filter applied to the walker.
353+
354+
The walker's filter excludes .yaml, but a USER_CONTENT tuple with a .yaml
355+
arcname must still be emitted — this is a primary reason USER_CONTENT
356+
exists.
357+
"""
358+
with tempfile.TemporaryDirectory() as flow_dir:
359+
_fake_flow_dir(flow_dir, "conf.yaml")
360+
yaml_path = os.path.join(flow_dir, "conf.yaml")
361+
pkg = _build_pkg_for_user_tuples(
362+
flow_dir,
363+
user_content_from_addl={"conf.yaml": yaml_path},
364+
)
365+
# Restrict walker to .py only — .yaml should not come from the walker.
366+
pkg._user_code_filter = lambda fname: fname.lower().endswith(".py")
367+
with mock.patch.object(
368+
sys, "argv", [os.path.join(flow_dir, "flow.py")]
369+
), mock.patch("metaflow.R.use_r", return_value=False):
370+
tuples = list(pkg._user_code_tuples())
371+
by_arc = {arc: path for path, arc in tuples}
372+
assert "conf.yaml" in by_arc
373+
assert by_arc["conf.yaml"] == yaml_path
374+
375+
376+
# ---------------------------------------------------------------------------
377+
# Integration: _add_addl_files + _user_code_tuples together (DEF-011)
378+
# ---------------------------------------------------------------------------
379+
380+
381+
def test_integration_add_addl_then_user_code_tuples_dedupes_by_arcname():
382+
"""End-to-end: decorator emits USER_CONTENT for a file already in the
383+
flow dir; the final user tuples contain it only once with the walker's
384+
path.
385+
"""
386+
with tempfile.TemporaryDirectory() as flow_dir:
387+
_fake_flow_dir(flow_dir, "code.py")
388+
walker_path = os.path.join(flow_dir, "code.py")
389+
390+
deco = _make_deco([(walker_path, "code.py", ContentType.USER_CONTENT)])
391+
flow = _make_flow(
392+
steps=[_make_step()],
393+
flow_decorators={"fd": [deco]},
394+
)
395+
pkg = _build_pkg(flow, _make_environment(), _make_mfcontent())
396+
pkg._user_code_filter = lambda _: True
397+
pkg._exclude_tl_dirs = []
398+
pkg._user_flow_dir = None
399+
400+
# Phase 1: populate _user_content_from_addl from add_to_package hooks.
401+
pkg._add_addl_files()
402+
assert pkg._user_content_from_addl == {"code.py": os.path.realpath(walker_path)}
403+
404+
# Phase 2: walk the flow dir and merge addl USER_CONTENT.
405+
with mock.patch.object(
406+
sys, "argv", [os.path.join(flow_dir, "flow.py")]
407+
), mock.patch("metaflow.R.use_r", return_value=False):
408+
tuples = list(pkg._user_code_tuples())
409+
410+
# code.py is present exactly once, walker's copy wins (by arcname dedup).
411+
code_py = [t for t in tuples if t[1] == "code.py"]
412+
assert len(code_py) == 1
413+
414+
415+
def test_integration_add_addl_contributes_file_outside_flow_dir():
416+
"""End-to-end: decorator emits USER_CONTENT for a file that is NOT in the
417+
flow dir; it ends up in the user tuples via the merge path.
418+
"""
419+
with tempfile.TemporaryDirectory() as flow_dir, tempfile.NamedTemporaryFile(
420+
suffix=".cfg", delete=False
421+
) as external:
422+
try:
423+
_fake_flow_dir(flow_dir)
424+
425+
deco = _make_deco(
426+
[(external.name, "external.cfg", ContentType.USER_CONTENT)]
427+
)
428+
flow = _make_flow(
429+
steps=[_make_step()],
430+
flow_decorators={"fd": [deco]},
431+
)
432+
pkg = _build_pkg(flow, _make_environment(), _make_mfcontent())
433+
pkg._user_code_filter = lambda _: True
434+
pkg._exclude_tl_dirs = []
435+
pkg._user_flow_dir = None
436+
437+
pkg._add_addl_files()
438+
with mock.patch.object(
439+
sys, "argv", [os.path.join(flow_dir, "flow.py")]
440+
), mock.patch("metaflow.R.use_r", return_value=False):
441+
tuples = list(pkg._user_code_tuples())
442+
by_arc = {arc: path for path, arc in tuples}
443+
assert by_arc["external.cfg"] == os.path.realpath(external.name)
444+
finally:
445+
os.unlink(external.name)

0 commit comments

Comments
 (0)