Skip to content

Commit 1d36b3f

Browse files
Sbachmei/mic 5861/fix templated step (#150)
1 parent 1244c48 commit 1d36b3f

10 files changed

Lines changed: 413 additions & 60 deletions

File tree

CHANGELOG.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
**0.1.4 - 2/18/25**
1+
**0.1.5 - 2/20/25**
2+
3+
- Fix handling of templated steps when no looping or parallelism is requested
4+
5+
**0.1.4 - 2/20/25**
26

37
- Implement duplicate_template_step method on TemplatedStep class
48

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.. automodule:: easylink.pipeline_schema_constants.testing

docs/source/api_reference/pipeline_schema_constants/tests.rst

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/easylink/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class Config(LayeredConfigTree):
9797
def __init__(
9898
self,
9999
config_params: dict[str, Any],
100-
potential_schemas: list[PipelineSchema] | PipelineSchema = PIPELINE_SCHEMAS,
100+
potential_schemas: PipelineSchema | list[PipelineSchema] = PIPELINE_SCHEMAS,
101101
) -> None:
102102
super().__init__(layers=["initial_data", "default", "user_configured"])
103103
self.update(DEFAULT_ENVIRONMENT, layer="default")
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from easylink.pipeline_schema_constants import development, tests
1+
from easylink.pipeline_schema_constants import development, testing
22

33
ALLOWED_SCHEMA_PARAMS = {
44
"development": development.SCHEMA_PARAMS,
55
}
66

77
TESTING_SCHEMA_PARAMS = {
8-
"integration": tests.SINGLE_STEP_SCHEMA_PARAMS,
9-
"combined_bad_topology": tests.BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS,
10-
"combined_bad_implementation_names": tests.BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS,
8+
"integration": testing.SINGLE_STEP_SCHEMA_PARAMS,
9+
"combined_bad_topology": testing.BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS,
10+
"combined_bad_implementation_names": testing.BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS,
11+
"nested_templated_steps": testing.NESTED_TEMPLATED_STEPS_SCHEMA_PARAMS,
1112
}

src/easylink/pipeline_schema_constants/tests.py renamed to src/easylink/pipeline_schema_constants/testing.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,14 @@
1111
OutputSlot,
1212
OutputSlotMapping,
1313
)
14-
from easylink.step import HierarchicalStep, InputStep, LoopStep, OutputStep, Step
14+
from easylink.step import (
15+
HierarchicalStep,
16+
InputStep,
17+
LoopStep,
18+
OutputStep,
19+
ParallelStep,
20+
Step,
21+
)
1522
from easylink.utilities.validation_utils import validate_input_file_dummy
1623

1724
SINGLE_STEP_NODES = [
@@ -128,3 +135,85 @@
128135
]
129136

130137
BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS = (BAD_COMBINED_TOPOLOGY_NODES, SINGLE_STEP_EDGES)
138+
139+
140+
NESTED_TEMPLATED_STEPS_NODES = [
141+
InputStep(),
142+
LoopStep(
143+
template_step=ParallelStep(
144+
template_step=HierarchicalStep(
145+
step_name="step_1",
146+
input_slots=[
147+
InputSlot(
148+
name="step_1_main_input",
149+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
150+
validator=validate_input_file_dummy,
151+
),
152+
],
153+
output_slots=[OutputSlot("step_1_main_output")],
154+
nodes=[
155+
Step(
156+
step_name="step_1a",
157+
input_slots=[
158+
InputSlot(
159+
name="step_1a_main_input",
160+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
161+
validator=validate_input_file_dummy,
162+
),
163+
],
164+
output_slots=[OutputSlot("step_1a_main_output")],
165+
),
166+
Step(
167+
step_name="step_1b",
168+
input_slots=[
169+
InputSlot(
170+
name="step_1b_main_input",
171+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
172+
validator=validate_input_file_dummy,
173+
),
174+
],
175+
output_slots=[OutputSlot("step_1b_main_output")],
176+
),
177+
],
178+
edges=[
179+
EdgeParams(
180+
source_node="step_1a",
181+
target_node="step_1b",
182+
output_slot="step_1a_main_output",
183+
input_slot="step_1b_main_input",
184+
),
185+
],
186+
input_slot_mappings=[
187+
InputSlotMapping(
188+
parent_slot="step_1_main_input",
189+
child_node="step_1a",
190+
child_slot="step_1a_main_input",
191+
),
192+
],
193+
output_slot_mappings=[
194+
OutputSlotMapping(
195+
parent_slot="step_1_main_output",
196+
child_node="step_1b",
197+
child_slot="step_1b_main_output",
198+
),
199+
],
200+
),
201+
),
202+
self_edges=[
203+
EdgeParams(
204+
source_node="step_1",
205+
target_node="step_1",
206+
output_slot="step_1_main_output",
207+
input_slot="step_1_main_input",
208+
),
209+
],
210+
),
211+
OutputStep(
212+
input_slots=[
213+
InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
214+
],
215+
),
216+
]
217+
218+
219+
NESTED_TEMPLATED_STEPS_SCHEMA_PARAMS = (NESTED_TEMPLATED_STEPS_NODES, SINGLE_STEP_EDGES)

src/easylink/step.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,17 @@ def set_configuration_state(
282282
) -> None:
283283
"""Sets the configuration state for this ``Step``.
284284
285+
The so-called 'configuration state' for a given ``Step`` is backed up by
286+
a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
287+
attribute. There are two possible ``ConfigurationStates``:
288+
:class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
289+
290+
This method sets the configuration state of this ``Step`` based on whether
291+
or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
292+
(i.e. its portion of the user-suppled pipeline specification
293+
file); any required deviation from this behavior requires special
294+
handling.
295+
285296
Parameters
286297
----------
287298
parent_config
@@ -378,8 +389,9 @@ def _validate_nonleaf(
378389
) -> dict[str, list[str]]:
379390
"""Validates a non-leaf ``Step``."""
380391
errors = {}
381-
for node in self.step_graph.nodes:
382-
step = self.step_graph.nodes[node]["step"]
392+
nodes = self.step_graph.nodes
393+
for node in nodes:
394+
step = nodes[node]["step"]
383395
if isinstance(step, IOStep):
384396
continue
385397
if step.name not in step_config:
@@ -390,7 +402,7 @@ def _validate_nonleaf(
390402
)
391403
if step_errors:
392404
errors.update(step_errors)
393-
extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
405+
extra_steps = set(step_config.keys()) - set(nodes)
394406
for extra_step in extra_steps:
395407
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
396408
return errors
@@ -807,12 +819,43 @@ def set_configuration_state(
807819
The configuration for any implementations to be combined.
808820
input_data_config
809821
The input data configuration for the entire pipeline.
822+
823+
Notes
824+
-----
825+
A ``TemplatedStep`` is always assigned a :class:`NonLeafConfigurationState`
826+
even if it has no multiplicity since (despite having no copies to make) we
827+
still need to traverse the sub-``Steps`` to get to the one with a single
828+
:class:`~easylink.implementation.Implementation`, i.e. the one with a
829+
:class:`LeafConfigurationState`.
810830
"""
811-
num_repeats = len(self._get_config(parent_config[self.name]))
812-
self.step_graph = self._update_step_graph(num_repeats)
813-
self.slot_mappings = self._update_slot_mappings(num_repeats)
814-
super().set_configuration_state(
815-
parent_config, combined_implementations, input_data_config
831+
step_config = parent_config[self.name]
832+
if self.config_key not in step_config:
833+
# Special handle the step_graph update
834+
self.step_graph = StepGraph()
835+
self.template_step.name = self.name
836+
self.step_graph.add_node_from_step(self.template_step)
837+
# Special handle the slot_mappings update
838+
input_mappings = [
839+
InputSlotMapping(slot, self.name, slot) for slot in self.input_slots
840+
]
841+
output_mappings = [
842+
OutputSlotMapping(slot, self.name, slot) for slot in self.output_slots
843+
]
844+
self.slot_mappings = {"input": input_mappings, "output": output_mappings}
845+
# Add the key back to the expanded config
846+
expanded_config = LayeredConfigTree({self.name: step_config})
847+
else:
848+
expanded_config = self._get_config(step_config)
849+
num_repeats = len(expanded_config)
850+
self.step_graph = self._update_step_graph(num_repeats)
851+
self.slot_mappings = self._update_slot_mappings(num_repeats)
852+
# Manually set the configuration state to non-leaf instead of relying
853+
# on super().get_configuration_state() because that method will erroneously
854+
# set to leaf state when we have no multiplicity (because in that case the
855+
# user didn't actually include the config_key in the pipeline specification
856+
# file, hence num_repeats == 1)
857+
self._configuration_state = NonLeafConfigurationState(
858+
self, expanded_config, combined_implementations, input_data_config
816859
)
817860

818861
def _duplicate_template_step(self) -> Step:
@@ -1105,9 +1148,10 @@ def validate_step(
11051148
initial ones are handled.
11061149
11071150
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1108-
here as opposed to in :meth:`set_configuration_state` (as is done in :class:`TemplatedStep`)
1109-
because ``ChoiceStep`` validation happens prior to setting the configuration
1110-
state and actually requires the ``StepGraph`` and ``SlotMappings``.
1151+
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1152+
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1153+
prior to :meth:`set_configuration_state`, but the validations itself actually
1154+
requires the updated ``StepGraph`` and ``SlotMappings``.
11111155
11121156
We do not attempt to validate the subgraph here if the 'type' key is unable
11131157
to be validated.
@@ -1136,7 +1180,7 @@ def validate_step(
11361180
]
11371181
}
11381182

1139-
# Handle the actual chosen step_config
1183+
# HACK: Update the step graph and mappings here because we need them for validation
11401184
self.step_graph = self._update_step_graph(subgraph)
11411185
self.slot_mappings = self._update_slot_mappings(subgraph)
11421186
# NOTE: A ChoiceStep is by definition non-leaf step
@@ -1163,11 +1207,11 @@ def set_configuration_state(
11631207
11641208
Notes
11651209
-----
1166-
We update the :class:`~easylink.graph_components.StepGraph` and
1167-
:class:`SlotMappings<easylink.graph_components.SlotMapping>` in
1168-
:meth:`validate_step` as opposed to here (as is done with
1169-
:class:`TemplatedSteps<TemplatedStep>`) because ``ChoiceStep`` validation
1170-
happens prior to this but requires the ``StepGraph`` and ``SlotMappings``.
1210+
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1211+
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1212+
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1213+
prior to :meth:`set_configuration_state`, but the validations itself actually
1214+
requires the updated ``StepGraph`` and ``SlotMappings``.
11711215
"""
11721216

11731217
chosen_parent_config = LayeredConfigTree(
@@ -1364,7 +1408,6 @@ def get_implementation_edges(self, edge: EdgeParams) -> list[EdgeParams]:
13641408
for mapping in mappings:
13651409
imp_edge = mapping.remap_edge(edge)
13661410
implementation_edges.append(imp_edge)
1367-
13681411
elif edge.target_node == self._step.name:
13691412
mappings = [
13701413
mapping
@@ -1520,7 +1563,6 @@ def get_implementation_edges(self, edge: EdgeParams) -> list[EdgeParams]:
15201563
new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
15211564
imp_edges = new_step.get_implementation_edges(new_edge)
15221565
implementation_edges.extend(imp_edges)
1523-
15241566
elif edge.target_node == self._step.name:
15251567
mappings = [
15261568
mapping
@@ -1544,8 +1586,9 @@ def _configure_subgraph_steps(self) -> None:
15441586
This method recursively traverses the ``StepGraph`` and sets the configuration
15451587
state for each ``Step`` until reaching all leaf nodes.
15461588
"""
1547-
for node in self._step.step_graph.nodes:
1548-
step = self._step.step_graph.nodes[node]["step"]
1589+
nodes = self._step.step_graph.nodes
1590+
for node in nodes:
1591+
step = nodes[node]["step"]
15491592
step.set_configuration_state(
15501593
self.pipeline_config, self.combined_implementations, self.input_data_config
15511594
)

tests/unit/conftest.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,42 @@
414414
},
415415
},
416416
},
417+
"nested_templated_steps": {
418+
"step_1": {
419+
"iterate": [
420+
{ # loop 1: parallel with multiplicity
421+
"parallel": [
422+
{
423+
"implementation": {"name": "step_1_python_pandas"},
424+
"input_data_file": "file1",
425+
},
426+
{
427+
"implementation": {"name": "step_1_python_pandas"},
428+
"input_data_file": "file2",
429+
},
430+
],
431+
},
432+
{ # loop 2: parallel with no multiplicity
433+
"parallel": [
434+
{
435+
"input_data_file": "file1",
436+
"implementation": {"name": "step_1_python_pandas"},
437+
},
438+
],
439+
},
440+
{ # loop 3: missing 'parallel' key, uses hierarchical step
441+
"substeps": {
442+
"step_1a": {
443+
"implementation": {"name": "step_1a_python_pandas"},
444+
},
445+
"step_1b": {
446+
"implementation": {"name": "step_1b_python_pandas"},
447+
},
448+
},
449+
},
450+
],
451+
},
452+
},
417453
}
418454

419455
INPUT_DATA_FORMAT_DICT = {

0 commit comments

Comments
 (0)