Skip to content

Commit 44dfaca

Browse files
committed
Allow FlowDecorators, FlowMutators and StepMutators to add_to_package
This provides an easier way to build things that can modify what gets packaged with the .tar.gz that gets sent remotely to execute
1 parent 8e29f77 commit 44dfaca

5 files changed

Lines changed: 342 additions & 0 deletions

File tree

metaflow/decorators.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,41 @@ def get_top_level_options(self):
261261
"""
262262
return []
263263

264+
def add_to_package(self):
265+
"""
266+
Called to add custom files needed by this flow decorator. This hook will be
267+
called in the `MetaflowPackage` class where metaflow compiles the code package
268+
tarball. This hook can return one of two things (the first is for backwards
269+
compatibility -- move to the second):
270+
- a generator yielding a tuple of `(file_path, arcname)` to add files to
271+
the code package. `file_path` is the path to the file on the local filesystem
272+
and `arcname` is the path relative to the packaged code.
273+
- a generator yielding a tuple of `(content, arcname, type)` where:
274+
- type is one of
275+
ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
276+
- for USER_CONTENT:
277+
- the file will be included relative to the directory containing the
278+
user's flow file.
279+
- content: path to the file to include
280+
- arcname: path relative to the directory containing the user's flow file
281+
- for CODE_CONTENT:
282+
- the file will be included relative to the code directory in the package.
283+
This will be the directory containing `metaflow`.
284+
- content: path to the file to include
285+
- arcname: path relative to the code directory in the package
286+
- for MODULE_CONTENT:
287+
- the module will be added to the code package as a python module. It will
288+
be accessible as usual (import <module_name>)
289+
- content: name of the module
290+
- arcname: None (ignored)
291+
- for OTHER_CONTENT:
292+
- the file will be included relative to any other configuration/metadata
293+
files for the flow
294+
- content: path to the file to include
295+
- arcname: path relative to the config directory in the package
296+
"""
297+
return []
298+
264299

265300
# compare this to parameters.add_custom_parameters
266301
def add_decorator_options(cmd):

metaflow/package/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,16 @@ def _add_tuple(path_tuple):
615615
elif file_type == ContentType.OTHER_CONTENT:
616616
self._mfcontent.add_other_file(file_path, file_name)
617617

618+
# flow decorators
619+
for decos in self._flow._flow_decorators.values():
620+
for deco in decos:
621+
for path_tuple in deco.add_to_package():
622+
path_tuple = _check_tuple(path_tuple)
623+
if path_tuple is None:
624+
continue
625+
_add_tuple(path_tuple)
626+
627+
# step decorators
618628
for step in self._flow:
619629
for deco in step.decorators:
620630
for path_tuple in deco.add_to_package():
@@ -630,6 +640,27 @@ def _add_tuple(path_tuple):
630640
continue
631641
_add_tuple(path_tuple)
632642

643+
# flow mutators
644+
for mutator in self._flow._flow_mutators:
645+
for path_tuple in mutator.add_to_package():
646+
path_tuple = _check_tuple(path_tuple)
647+
if path_tuple is None:
648+
continue
649+
_add_tuple(path_tuple)
650+
651+
# step mutators (deduplicated across steps)
652+
seen_step_mutators = set()
653+
for step in self._flow:
654+
for mutator in step.config_decorators:
655+
if id(mutator) in seen_step_mutators:
656+
continue
657+
seen_step_mutators.add(id(mutator))
658+
for path_tuple in mutator.add_to_package():
659+
path_tuple = _check_tuple(path_tuple)
660+
if path_tuple is None:
661+
continue
662+
_add_tuple(path_tuple)
663+
633664
def _user_code_tuples(self):
634665
if R.use_r():
635666
# the R working directory

metaflow/user_decorators/user_flow_decorator.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,38 @@ def mutate(
262262
A representation of this flow
263263
"""
264264
return None
265+
266+
def add_to_package(self):
267+
"""
268+
Called to add custom files needed by this flow mutator. This hook will be
269+
called in the `MetaflowPackage` class where metaflow compiles the code package
270+
tarball. This hook can return one of two things (the first is for backwards
271+
compatibility -- move to the second):
272+
- a generator yielding a tuple of `(file_path, arcname)` to add files to
273+
the code package. `file_path` is the path to the file on the local filesystem
274+
and `arcname` is the path relative to the packaged code.
275+
- a generator yielding a tuple of `(content, arcname, type)` where:
276+
- type is one of
277+
ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
278+
- for USER_CONTENT:
279+
- the file will be included relative to the directory containing the
280+
user's flow file.
281+
- content: path to the file to include
282+
- arcname: path relative to the directory containing the user's flow file
283+
- for CODE_CONTENT:
284+
- the file will be included relative to the code directory in the package.
285+
This will be the directory containing `metaflow`.
286+
- content: path to the file to include
287+
- arcname: path relative to the code directory in the package
288+
- for MODULE_CONTENT:
289+
- the module will be added to the code package as a python module. It will
290+
be accessible as usual (import <module_name>)
291+
- content: name of the module
292+
- arcname: None (ignored)
293+
- for OTHER_CONTENT:
294+
- the file will be included relative to any other configuration/metadata
295+
files for the flow
296+
- content: path to the file to include
297+
- arcname: path relative to the config directory in the package
298+
"""
299+
return []

metaflow/user_decorators/user_step_decorator.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,3 +747,38 @@ def mutate(
747747
A representation of this step
748748
"""
749749
return None
750+
751+
def add_to_package(self):
752+
"""
753+
Called to add custom files needed by this step mutator. This hook will be
754+
called in the `MetaflowPackage` class where metaflow compiles the code package
755+
tarball. This hook can return one of two things (the first is for backwards
756+
compatibility -- move to the second):
757+
- a generator yielding a tuple of `(file_path, arcname)` to add files to
758+
the code package. `file_path` is the path to the file on the local filesystem
759+
and `arcname` is the path relative to the packaged code.
760+
- a generator yielding a tuple of `(content, arcname, type)` where:
761+
- type is one of
762+
ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
763+
- for USER_CONTENT:
764+
- the file will be included relative to the directory containing the
765+
user's flow file.
766+
- content: path to the file to include
767+
- arcname: path relative to the directory containing the user's flow file
768+
- for CODE_CONTENT:
769+
- the file will be included relative to the code directory in the package.
770+
This will be the directory containing `metaflow`.
771+
- content: path to the file to include
772+
- arcname: path relative to the code directory in the package
773+
- for MODULE_CONTENT:
774+
- the module will be added to the code package as a python module. It will
775+
be accessible as usual (import <module_name>)
776+
- content: name of the module
777+
- arcname: None (ignored)
778+
- for OTHER_CONTENT:
779+
- the file will be included relative to any other configuration/metadata
780+
files for the flow
781+
- content: path to the file to include
782+
- arcname: path relative to the config directory in the package
783+
"""
784+
return []

test/unit/test_add_to_package.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""Tests for _add_addl_files in MetaflowPackage.
2+
3+
Exercises the add_to_package hooks on FlowDecorators, FlowMutators,
4+
StepDecorators, and StepMutators, as well as deduplication and error handling.
5+
"""
6+
7+
import os
8+
import tempfile
9+
from types import ModuleType
10+
from unittest.mock import MagicMock, call
11+
12+
import pytest
13+
14+
from metaflow.package import (
15+
MetaflowPackage,
16+
NonUniqueFileNameToFilePathMappingException,
17+
)
18+
from metaflow.packaging_sys import ContentType
19+
20+
21+
# ---------------------------------------------------------------------------
22+
# Helpers
23+
# ---------------------------------------------------------------------------
24+
25+
26+
def _make_step(decorators=None, config_decorators=None):
27+
step = MagicMock()
28+
step.decorators = decorators or []
29+
step.config_decorators = config_decorators or []
30+
return step
31+
32+
33+
def _make_flow(steps, flow_decorators=None, flow_mutators=None):
34+
flow = MagicMock()
35+
# The flow may be iterated multiple times (step decos + step mutators),
36+
# so return a fresh iterator each time.
37+
flow.__iter__ = lambda self: iter(steps)
38+
flow._flow_decorators = flow_decorators or {}
39+
flow._flow_mutators = flow_mutators or []
40+
return flow
41+
42+
43+
def _make_environment(tuples=None):
44+
env = MagicMock()
45+
env.add_to_package.return_value = tuples or []
46+
return env
47+
48+
49+
def _make_mfcontent():
50+
return MagicMock()
51+
52+
53+
def _make_deco(tuples):
54+
"""Create a mock decorator-like object with an add_to_package method."""
55+
deco = MagicMock()
56+
deco.add_to_package.return_value = tuples
57+
return deco
58+
59+
60+
def _call_add_addl_files(flow, environment, mfcontent):
61+
"""Call _add_addl_files on a bare MetaflowPackage instance."""
62+
pkg = object.__new__(MetaflowPackage)
63+
pkg._flow = flow
64+
pkg._environment = environment
65+
pkg._mfcontent = mfcontent
66+
pkg._add_addl_files()
67+
return mfcontent
68+
69+
70+
# ---------------------------------------------------------------------------
71+
# Tests
72+
# ---------------------------------------------------------------------------
73+
74+
75+
def test_flow_decorator_add_to_package():
76+
"""Flow decorator's add_to_package files are added."""
77+
with tempfile.NamedTemporaryFile(suffix=".py") as f:
78+
deco = _make_deco([(f.name, "flow_deco_file.py", ContentType.CODE_CONTENT)])
79+
flow = _make_flow(
80+
steps=[_make_step()],
81+
flow_decorators={"my_deco": [deco]},
82+
)
83+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
84+
mc.add_code_file.assert_called_once_with(
85+
os.path.realpath(f.name), "flow_deco_file.py"
86+
)
87+
88+
89+
def test_flow_mutator_add_to_package_module():
90+
"""Flow mutator's add_to_package with MODULE_CONTENT calls add_module."""
91+
import json
92+
93+
mutator = _make_deco([(json, None, ContentType.MODULE_CONTENT)])
94+
flow = _make_flow(steps=[_make_step()], flow_mutators=[mutator])
95+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
96+
mc.add_module.assert_called_once_with(json)
97+
98+
99+
def test_step_mutator_deduplicated_across_steps():
100+
"""Same StepMutator instance on two steps: add_to_package called once."""
101+
with tempfile.NamedTemporaryFile(suffix=".py") as f:
102+
mutator = _make_deco([(f.name, "shared.py", ContentType.CODE_CONTENT)])
103+
step1 = _make_step(config_decorators=[mutator])
104+
step2 = _make_step(config_decorators=[mutator])
105+
flow = _make_flow(steps=[step1, step2])
106+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
107+
assert mutator.add_to_package.call_count == 1
108+
mc.add_code_file.assert_called_once()
109+
110+
111+
def test_step_mutator_distinct_instances():
112+
"""Two different StepMutator instances: both called."""
113+
with tempfile.NamedTemporaryFile(suffix=".py") as f1, tempfile.NamedTemporaryFile(
114+
suffix=".py"
115+
) as f2:
116+
m1 = _make_deco([(f1.name, "file1.py", ContentType.CODE_CONTENT)])
117+
m2 = _make_deco([(f2.name, "file2.py", ContentType.CODE_CONTENT)])
118+
step = _make_step(config_decorators=[m1, m2])
119+
flow = _make_flow(steps=[step])
120+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
121+
assert m1.add_to_package.call_count == 1
122+
assert m2.add_to_package.call_count == 1
123+
assert mc.add_code_file.call_count == 2
124+
125+
126+
def test_legacy_two_tuple_defaults_to_code_content():
127+
"""A 2-tuple (file_path, arcname) is treated as CODE_CONTENT."""
128+
with tempfile.NamedTemporaryFile(suffix=".py") as f:
129+
deco = _make_deco([(f.name, "legacy.py")])
130+
step = _make_step(decorators=[deco])
131+
flow = _make_flow(steps=[step])
132+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
133+
mc.add_code_file.assert_called_once_with(os.path.realpath(f.name), "legacy.py")
134+
135+
136+
def test_non_unique_filename_raises():
137+
"""Different file paths for the same arcname raises an exception."""
138+
with tempfile.NamedTemporaryFile(suffix=".py") as f1, tempfile.NamedTemporaryFile(
139+
suffix=".py"
140+
) as f2:
141+
d1 = _make_deco([(f1.name, "same_name.py", ContentType.CODE_CONTENT)])
142+
d2 = _make_deco([(f2.name, "same_name.py", ContentType.CODE_CONTENT)])
143+
step = _make_step(decorators=[d1, d2])
144+
flow = _make_flow(steps=[step])
145+
with pytest.raises(NonUniqueFileNameToFilePathMappingException):
146+
_call_add_addl_files(flow, _make_environment(), _make_mfcontent())
147+
148+
149+
def test_module_content_deduplicated():
150+
"""Same module returned by two decorators: add_module called once."""
151+
import json
152+
153+
d1 = _make_deco([(json, None, ContentType.MODULE_CONTENT)])
154+
d2 = _make_deco([(json, None, ContentType.MODULE_CONTENT)])
155+
step = _make_step(decorators=[d1, d2])
156+
flow = _make_flow(steps=[step])
157+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
158+
mc.add_module.assert_called_once_with(json)
159+
160+
161+
def test_other_content_type():
162+
"""OTHER_CONTENT files are passed to add_other_file."""
163+
with tempfile.NamedTemporaryFile(suffix=".yaml") as f:
164+
deco = _make_deco([(f.name, "config.yaml", ContentType.OTHER_CONTENT)])
165+
step = _make_step(decorators=[deco])
166+
flow = _make_flow(steps=[step])
167+
mc = _call_add_addl_files(flow, _make_environment(), _make_mfcontent())
168+
mc.add_other_file.assert_called_once_with(
169+
os.path.realpath(f.name), "config.yaml"
170+
)
171+
172+
173+
def test_ordering_flow_decorators_before_step_decorators():
174+
"""Flow decorators are processed before step decorators.
175+
176+
We verify the flow decorator's add_to_package is called before the step
177+
decorator's by checking call order on a shared mock recorder.
178+
"""
179+
call_order = []
180+
181+
def make_recording_deco(label, tuples):
182+
deco = MagicMock()
183+
184+
def record():
185+
call_order.append(label)
186+
return tuples
187+
188+
deco.add_to_package = record
189+
return deco
190+
191+
with tempfile.NamedTemporaryFile(suffix=".py") as f1, tempfile.NamedTemporaryFile(
192+
suffix=".py"
193+
) as f2:
194+
flow_deco = make_recording_deco(
195+
"flow_deco", [(f1.name, "flow_file.py", ContentType.CODE_CONTENT)]
196+
)
197+
step_deco = make_recording_deco(
198+
"step_deco", [(f2.name, "step_file.py", ContentType.CODE_CONTENT)]
199+
)
200+
step = _make_step(decorators=[step_deco])
201+
flow = _make_flow(
202+
steps=[step],
203+
flow_decorators={"fd": [flow_deco]},
204+
)
205+
_call_add_addl_files(flow, _make_environment(), _make_mfcontent())
206+
assert call_order == ["flow_deco", "step_deco"]

0 commit comments

Comments
 (0)