Skip to content

Commit e6f59d6

Browse files
authored
Merge pull request PolusAI#207 from jfennick/source_means_var_not_val
Source means var not val
2 parents 17c4b74 + 6784e45 commit e6f59d6

File tree

7 files changed

+46
-77
lines changed

7 files changed

+46
-77
lines changed

src/wic/api/pythonapi.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
201201
# (Very useful for regression testing!)
202202
# NOTE: process_name is either clt name or workflow name
203203
tmp = __value.value if __value.value else f"{__name}{process_self.process_name}"
204-
alias_dict = {'wic_alias': {'key': tmp}}
204+
alias_dict = {'wic_alias': tmp}
205205
local_input._set_value(alias_dict, linked=True)
206-
anchor_dict = {'wic_anchor': {'key': tmp}}
206+
anchor_dict = {'wic_anchor': tmp}
207207
__value._set_value(anchor_dict, linked=True)
208208
except BaseException as exc:
209209
raise exc
@@ -223,7 +223,7 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
223223
__value._set_value(f"{tmp}", linked=True)
224224
else:
225225
anchor_dict = __value.value
226-
alias_dict = {'wic_alias': {'key': anchor_dict['wic_anchor']['key']}}
226+
alias_dict = {'wic_alias': anchor_dict['wic_anchor']}
227227
local_input._set_value(alias_dict, linked=True)
228228
except BaseException as exc:
229229
raise exc
@@ -237,7 +237,7 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
237237
f"got {__value.__class__.__name__}, "
238238
f"expected {obj.inp_type.__name__}"
239239
)
240-
ii_dict = {'wic_inline_input': {'key': __value}}
240+
ii_dict = {'wic_inline_input': __value}
241241
process_self.inputs[index]._set_value(ii_dict)
242242

243243

@@ -382,12 +382,12 @@ def _yml(self) -> dict:
382382
if isinstance(inp.value, Path):
383383
# Special case for Path since it does not inherit from YAMLObject
384384
in_dict[inp.name] = str(inp.value)
385-
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_alias', {}).get('key', {}), Path):
385+
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_alias', {}), Path):
386386
# Special case for Path since it does not inherit from YAMLObject
387-
in_dict[inp.name] = {'wic_alias': {'key': str(inp.value['wic_alias']['key'])}}
388-
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_inline_input', {}).get('key', {}), Path):
387+
in_dict[inp.name] = {'wic_alias': str(inp.value['wic_alias'])}
388+
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_inline_input', {}), Path):
389389
# Special case for Path since it does not inherit from YAMLObject
390-
in_dict[inp.name] = {'wic_inline_input': {'key': str(inp.value['wic_inline_input']['key'])}}
390+
in_dict[inp.name] = {'wic_inline_input': str(inp.value['wic_inline_input'])}
391391
elif isinstance(inp.value, str):
392392
in_dict[inp.name] = inp.value # Obviously strings are serializable
393393
elif isinstance(inp.value, yaml.YAMLObject):

src/wic/ast.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,12 @@ def python_script_generate_cwl(yaml_tree_tuple: YamlTree,
287287
yml_args = copy.deepcopy(steps[i][step_key]['in'])
288288
python_script_path = yml_args.get('script', '')
289289
if isinstance(python_script_path, dict) and 'wic_inline_input' in python_script_path:
290-
python_script_path = python_script_path['wic_inline_input']['key']
290+
python_script_path = python_script_path['wic_inline_input']
291291
# NOTE: The existence of the script: tag should now be guaranteed in the schema
292292
del yml_args['script']
293293
python_script_docker_pull = yml_args.get('dockerPull', '') # Optional
294294
if isinstance(python_script_docker_pull, dict) and 'wic_inline_input' in python_script_docker_pull:
295-
python_script_docker_pull = python_script_docker_pull['wic_inline_input']['key']
295+
python_script_docker_pull = python_script_docker_pull['wic_inline_input']
296296
if 'dockerPull' in yml_args:
297297
del yml_args['dockerPull']
298298
del steps[i][step_key]['in']['dockerPull']

src/wic/compiler.py

+20-22
Original file line numberDiff line numberDiff line change
@@ -451,8 +451,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
451451
out_key = keys[0]
452452
out_val = out_val[out_key]
453453
if isinstance(out_val, Dict) and 'wic_anchor' in out_val:
454-
out_val = out_val['wic_anchor']
455-
edgedef = out_val['key']
454+
edgedef = out_val['wic_anchor']
456455

457456
# NOTE: There can only be one definition, but multiple call sites.
458457
if not explicit_edge_defs_copy.get(edgedef):
@@ -469,13 +468,12 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
469468
# Extract input value into separate yml file
470469
# Replace it here with a new variable name
471470
arg_val = steps[i][step_key]['in'][arg_key]
471+
472472
# Convert native YAML to a JSON-encoded string for specific tags.
473473
tags = ['config']
474474
if arg_key in tags and isinstance(arg_val, Dict) and ('wic_inline_input' in arg_val):
475-
# Do NOT wrap config: in {'source': ...}
476-
arg_val = {'wic_inline_input': {'key': json.dumps(arg_val['wic_inline_input']['key'])}}
477-
elif isinstance(arg_val, str):
478-
arg_val = {'source': arg_val}
475+
arg_val = {'wic_inline_input': json.dumps(arg_val['wic_inline_input'])}
476+
479477
# Use triple underscore for namespacing so we can split later
480478
in_name = f'{step_name_i}___{arg_key}' # {step_name_i}_input___{arg_key}
481479

@@ -495,21 +493,21 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
495493
# NOTE: Exclude cwl_watcher from explicit edge dereferences.
496494
# Since cwl_watcher requires explicit filenames for globbing,
497495
# we do not want to replace them with internal CWL dependencies!
498-
if not explicit_edge_defs_copy.get(arg_val['key']):
496+
if not explicit_edge_defs_copy.get(arg_val):
499497
if is_root and not testing:
500498
# Even if is_root, we don't want to raise an Exception
501499
# here because in test_cwl_embedding_independence, we
502500
# recompile all subworkflows as if they were root. That
503501
# will cause this code path to be taken but it is not
504502
# actually an error. Add a CWL input for testing only.
505-
raise Exception(f"Error! No definition found for &{arg_val['key']}!")
503+
raise Exception(f"Error! No definition found for &{arg_val}!")
506504
inputs_workflow.update({in_name: in_dict})
507505
steps[i][step_key]['in'][arg_key] = {'source': in_name}
508506
# Add a 'dummy' value to explicit_edge_calls anyway, because
509507
# that determines sub_args_provided when the recursion returns.
510508
explicit_edge_calls_copy.update({in_name: (namespaces + [step_name_i], arg_key)})
511509
else:
512-
(nss_def_init, var) = explicit_edge_defs_copy[arg_val['key']]
510+
(nss_def_init, var) = explicit_edge_defs_copy[arg_val]
513511

514512
nss_def_embedded = var.split('___')[:-1]
515513
nss_call_embedded = arg_key.split('___')[:-1]
@@ -546,7 +544,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
546544
elif len(nss_call_tails) > 1:
547545
inputs_workflow.update({in_name: in_dict})
548546
# Store explicit edge call site info up through the recursion.
549-
d = {in_name: explicit_edge_defs_copy[arg_val['key']]}
547+
d = {in_name: explicit_edge_defs_copy[arg_val]}
550548
# d = {in_name, (namespaces + [step_name_i], var)} # ???
551549
explicit_edge_calls_copy.update(d)
552550
steps[i][step_key]['in'][arg_key] = {'source': in_name}
@@ -601,7 +599,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
601599

602600
utils_graphs.add_graph_edge(args, graph_init, nss_def, nss_call, label, color='blue')
603601
elif isinstance(arg_val, Dict) and 'wic_inline_input' in arg_val:
604-
arg_val = arg_val['wic_inline_input']['key']
602+
arg_val = arg_val['wic_inline_input']
605603

606604
if arg_key in steps[i][step_key].get('scatter', []):
607605
# Promote scattered input types to arrays
@@ -624,6 +622,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
624622
graphdata.nodes.append((input_node_name, attrs))
625623
graphdata.edges.append((input_node_name, step_node_name, {}))
626624
else:
625+
arg_var: str = arg_val
627626
# Leave un-evaluated, i.e. allow the user to inject raw CWL.
628627
# The un-evaluated string should refer to either an inputs: variable
629628
# or an internal CWL dependency, i.e. an output from a previous step.
@@ -638,20 +637,20 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
638637
# (yet) be inlined. Somehow, if they are not marked with
639638
# inlineable: False, test_inline_subworkflows can still pass.
640639
# This Exception will (correctly) cause such inlineing tests to fail.
641-
if arg_val['source'] not in yaml_tree.get('inputs', {}):
640+
if arg_var not in yaml_tree.get('inputs', {}):
642641
if not args.allow_raw_cwl:
643-
print(f"Warning! Did you forget to use !ii before {arg_val['source']} in {yaml_stem}.yml?")
642+
print(f"Warning! Did you forget to use !ii before {arg_var} in {yaml_stem}.yml?")
644643
print('If you want to compile the workflow anyway, use --allow_raw_cwl')
645644
sys.exit(1)
646645

647646
inputs = yaml_tree.get('inputs', {})
648647
unbound_lit_var = 'Error! Unbound literal variable'
649648
if inputs == {}:
650-
raise Exception(f"{unbound_lit_var}{arg_val['source']} not in inputs: tag in {yaml_stem}.yml")
649+
raise Exception(f"{unbound_lit_var}{arg_var} not in inputs: tag in {yaml_stem}.yml")
651650
inputs_dump = yaml.dump({'inputs': inputs})
652-
raise Exception(f"{unbound_lit_var}{arg_val['source']} not in\n{inputs_dump}\nin {yaml_stem}.yml")
651+
raise Exception(f"{unbound_lit_var}{arg_var} not in\n{inputs_dump}\nin {yaml_stem}.yml")
653652

654-
inputs_key_dict = yaml_tree['inputs'][arg_val['source']]
653+
inputs_key_dict = yaml_tree['inputs'][arg_var]
655654
if 'doc' in inputs_key_dict:
656655
inputs_key_dict['doc'] += '\\n' + in_dict.get('doc', '')
657656
else:
@@ -661,12 +660,12 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
661660
else:
662661
inputs_key_dict['label'] = in_dict.get('label', '')
663662

664-
if arg_val['source'] in input_mapping_copy:
665-
input_mapping_copy[arg_val['source']].append(in_name)
663+
if arg_var in input_mapping_copy:
664+
input_mapping_copy[arg_var].append(in_name)
666665
else:
667-
input_mapping_copy[arg_val['source']] = [in_name]
666+
input_mapping_copy[arg_var] = [in_name]
668667
# TODO: We can use un-evaluated variable names for input mapping; no notation for output mapping!
669-
steps[i][step_key]['in'][arg_key] = arg_val # Leave un-evaluated
668+
steps[i][step_key]['in'][arg_key] = {'source': arg_var} # Leave un-evaluated
670669

671670
for arg_key in args_required:
672671
# print('arg_key', arg_key)
@@ -854,8 +853,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
854853
else:
855854
# We cannot store string values as a dict, so use type: ignore
856855
arg_val = in_dict['value']
857-
new_val = arg_val['source'] if isinstance(arg_val, Dict) and 'source' in arg_val else arg_val
858-
new_keyval = {key: new_val}
856+
new_keyval = {key: arg_val}
859857
# else:
860858
# raise Exception(f"Error! Unknown type: {in_dict['type']}")
861859
yaml_inputs.update(new_keyval)

src/wic/inference.py

-2
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,6 @@ def perform_edge_inference(args: argparse.Namespace,
219219
else:
220220
vars_workflow_output_internal.append(f'{step_name_j}/{out_key}')
221221

222-
# arg_val = {'source': f'{step_name_j}/{out_key}'}
223222
arg_val = f'{step_name_j}/{out_key}'
224223
arg_keyval = {arg_key: arg_val}
225224
steps_i = utils_cwl.add_yamldict_keyval_in(steps[i], step_key, arg_keyval)
@@ -332,7 +331,6 @@ def perform_edge_inference(args: argparse.Namespace,
332331
# which should match in the parent workflow.
333332
inputs_workflow.update({in_name: in_dict})
334333

335-
# arg_keyval = {arg_key: {'source': in_name}}
336334
arg_keyval = {arg_key: in_name}
337335
steps_i = utils_cwl.add_yamldict_keyval_in(steps[i], step_key, arg_keyval)
338336
return steps_i

src/wic/input_output.py

+4-18
Original file line numberDiff line numberDiff line change
@@ -68,20 +68,6 @@ def write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool) -> N
6868
cwl_tree = node_data.compiled_cwl
6969
yaml_inputs = node_data.workflow_inputs_file
7070

71-
# NOTE: As part of the scatter feature we introduced the use of 'source',
72-
# but in some cases (biobb 'config' tag) it is not being removed correctly
73-
# in the compiler, so as a last resort remove it here.
74-
yaml_inputs_no_source = {}
75-
for key, val in yaml_inputs.items():
76-
try:
77-
if isinstance(val, str):
78-
val_dict = json.loads(val)
79-
if 'source' in val_dict:
80-
val = val_dict['source']
81-
except Exception as e:
82-
pass
83-
yaml_inputs_no_source[key] = val
84-
8571
path.mkdir(parents=True, exist_ok=True)
8672
if relative_run_path:
8773
filename_cwl = f'{yaml_stem}.cwl'
@@ -98,7 +84,7 @@ def write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool) -> N
9884
w.write(auto_gen_header)
9985
w.write(''.join(yaml_content))
10086

101-
yaml_content = yaml.dump(yaml_inputs_no_source, sort_keys=False, line_break='\n', indent=2, Dumper=NoAliasDumper)
87+
yaml_content = yaml.dump(yaml_inputs, sort_keys=False, line_break='\n', indent=2, Dumper=NoAliasDumper)
10288
with open(path / filename_yml, mode='w', encoding='utf-8') as inp:
10389
inp.write(auto_gen_header)
10490
inp.write(yaml_content)
@@ -220,10 +206,10 @@ def write_absolute_yaml_tags(args: argparse.Namespace, in_dict_in: Yaml, namespa
220206
# we don't want users' home directories in the yml files.
221207
cachedir_path = Path(args.cachedir).absolute()
222208
# print('setting cachedir_path to', cachedir_path)
223-
in_dict_in['root_workflow_yml_path'] = {'wic_inline_input': {'key': str(Path(args.yaml).parent.absolute())}}
209+
in_dict_in['root_workflow_yml_path'] = {'wic_inline_input': str(Path(args.yaml).parent.absolute())}
224210

225-
in_dict_in['cachedir_path'] = {'wic_inline_input': {'key': str(cachedir_path)}}
226-
in_dict_in['homedir'] = {'wic_inline_input': {'key': args.homedir}}
211+
in_dict_in['cachedir_path'] = {'wic_inline_input': str(cachedir_path)}
212+
in_dict_in['homedir'] = {'wic_inline_input': args.homedir}
227213

228214
# Add a 'dummy' values to explicit_edge_calls, because
229215
# that determines sub_args_provided when the recursion returns.

src/wic/schemas/wic_schema.py

+5-18
Original file line numberDiff line numberDiff line change
@@ -173,17 +173,11 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
173173
anytype: Dict[Any, Any] = {}
174174

175175
# See utils_yaml.py
176-
aliasprops = default_schema()
177-
aliasprops['properties'] = {'key': str_nonempty}
178-
aliasprops['required'] = ['key']
179176
alias = default_schema()
180-
alias['properties'] = {'wic_alias': aliasprops} # !*
177+
alias['properties'] = {'wic_alias': str_nonempty} # !*
181178

182-
iiprops = default_schema()
183-
iiprops['properties'] = {'key': anytype}
184-
iiprops['required'] = ['key']
185179
ii = default_schema()
186-
ii['properties'] = {'wic_inline_input': iiprops} # !ii
180+
ii['properties'] = {'wic_inline_input': anytype} # !ii
187181

188182
# required = []
189183
for key, val in cwl['inputs'].items():
@@ -211,11 +205,8 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
211205
if key == 'config' and name == 'config_tag_mdp':
212206
grompp = config_schemas.get('grompp', {})
213207

214-
iiprops_mdp = default_schema()
215-
iiprops_mdp['properties'] = {'key': grompp}
216-
iiprops_mdp['required'] = ['key']
217208
ii_mdp = default_schema()
218-
ii_mdp['properties'] = {'wic_inline_input': iiprops_mdp} # !ii
209+
ii_mdp['properties'] = {'wic_inline_input': grompp} # !ii
219210

220211
inputs_props[key] = ii_mdp
221212
continue
@@ -272,10 +263,8 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
272263
outputs['properties'] = outputs_props
273264

274265
# See utils_yaml.py
275-
anchorprops = default_schema()
276-
anchorprops['properties'] = {'key': str_nonempty}
277266
anchor = default_schema()
278-
anchor['properties'] = {'wic_anchor': anchorprops} # !&
267+
anchor['properties'] = {'wic_anchor': str_nonempty} # !&
279268

280269
keys_anchors: Json = {}
281270
for key in cwl['outputs'].keys():
@@ -461,10 +450,8 @@ def wic_main_schema(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[s
461450
in_schema['properties'] = {'script': str_nonempty}
462451

463452
# See utils_yaml.py
464-
anchorprops = default_schema()
465-
anchorprops['properties'] = {'key': str_nonempty}
466453
anchor = default_schema()
467-
anchor['properties'] = {'wic_anchor': anchorprops} # !&
454+
anchor['properties'] = {'wic_anchor': str_nonempty} # !&
468455

469456
# NOTE: We do not know the specific keys statically, so we have to use str_nonempty
470457
out_schema: Json = {'type': 'array', 'items': {'anyOf': [str_nonempty, anchor]}}

src/wic/utils_yaml.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
# because then these constructors will fire again.
99

1010

11-
def anchor_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Dict[str, Any]]:
12-
key = loader.construct_scalar(node)
11+
def anchor_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Any]:
12+
val = loader.construct_scalar(node)
1313
name = 'wic_anchor' # NOT '!&'
14-
return {name: {'key': key}}
14+
return {name: val}
1515

1616

17-
def alias_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Dict[str, Any]]:
18-
key = loader.construct_scalar(node)
17+
def alias_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Any]:
18+
val = loader.construct_scalar(node)
1919
name = 'wic_alias' # NOT '!*'
20-
return {name: {'key': key}}
20+
return {name: val}
2121

2222

2323
def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> Dict[str, Dict[str, Any]]:
@@ -37,7 +37,7 @@ def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> D
3737
else:
3838
raise Exception(f'Unknown yaml node type! {node}')
3939
name = 'wic_inline_input' # NOT '!ii'
40-
return {name: {'key': val}}
40+
return {name: val}
4141

4242

4343
def wic_loader() -> Type[yaml.SafeLoader]:

0 commit comments

Comments
 (0)