Skip to content

Commit 9473930

Browse files
kvgbshifaw
and
bshifaw
authored
Detect and flatten nested WDL directories (#268)
* Detect and flatten nested WDL directories to make execution compatible with Cromwell. Co-authored-by: bshifaw <[email protected]>
1 parent 66ff06d commit 9473930

File tree

6 files changed

+294
-2
lines changed

6 files changed

+294
-2
lines changed

src/cromshell/submit/command.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import csv
33
import json
44
import logging
5+
import tempfile
56
from datetime import datetime
67
from pathlib import Path, PurePath
78

@@ -61,14 +62,36 @@ class ValidationFailedError(Exception):
6162
default=False,
6263
help="Do not check womtool for validation before submitting.",
6364
)
65+
@click.option(
66+
"--do-not-flatten-wdls",
67+
is_flag=True,
68+
default=False,
69+
help=".",
70+
)
6471
@click.pass_obj
65-
def main(config, wdl, wdl_json, options_json, dependencies_zip, no_validation):
72+
def main(
73+
config,
74+
wdl,
75+
wdl_json,
76+
options_json,
77+
dependencies_zip,
78+
no_validation,
79+
do_not_flatten_wdls,
80+
):
6681
"""Submit a workflow and arguments to the Cromwell Server"""
6782

6883
LOGGER.info("submit")
6984

7085
http_utils.assert_can_communicate_with_server(config=config)
7186

87+
if not do_not_flatten_wdls and io_utils.has_nested_dependencies(wdl):
88+
tempdir = tempfile.TemporaryDirectory(prefix="cromshell_")
89+
90+
LOGGER.info(f"Flattening WDL structure to {tempdir.name}.")
91+
92+
wdl = io_utils.flatten_nested_dependencies(tempdir, wdl)
93+
dependencies_zip = tempdir.name
94+
7295
if no_validation:
7396
LOGGER.info("Skipping WDL validation")
7497
else:

src/cromshell/utilities/io_utils.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import re
44
import shutil
5+
import tempfile
56
from contextlib import nullcontext
67
from io import BytesIO
78
from pathlib import Path
@@ -101,6 +102,82 @@ def assert_path_is_not_empty(path: Union[str, Path], description: str) -> None:
101102
raise EOFError(f"ERROR: {description} is empty: {path}.")
102103

103104

105+
def has_nested_dependencies(wdl_path: str or Path) -> bool:
106+
"""Determine if a WDL has any nested imports."""
107+
108+
with open(wdl_path, "r") as rf:
109+
for line in rf:
110+
if line.startswith("import"):
111+
m = re.match(r'import "(.+)"', line)
112+
113+
imported_wdl_name = m.group(1)
114+
if "../" in imported_wdl_name:
115+
return True
116+
117+
return False
118+
119+
120+
def get_flattened_filename(tempdir: str, wdl_path: str or Path) -> Path:
121+
"""Generate hyphen-separated path to use for flattened WDL file path.
122+
For example:
123+
tempdir: /path/2/tempdir/ and wdl_path: /dir/path/2/wdl.wdl
124+
returns: /path/2/tempdir/dir-path-2-wdl.wdl
125+
"""
126+
127+
p = Path(wdl_path)
128+
129+
return Path(
130+
tempdir
131+
+ "/"
132+
+ re.sub("^-", "", re.sub("/", "-", str(p.parent)))
133+
+ "-"
134+
+ str(p.name)
135+
)
136+
137+
138+
def flatten_nested_dependencies(
139+
tempdir: tempfile.TemporaryDirectory, wdl_path: str
140+
) -> Path:
141+
"""Flatten a WDL directory structure and rewrite imports accordingly.
142+
143+
Return string representing the filesystem location of the rewritten WDL.
144+
145+
tempdir: /path/2/tempdir/
146+
wdl_path: /dir/path/2/wdl.wdl
147+
returns: /path/2/tempdir/dir-path-2-wdl.wdl
148+
"""
149+
150+
p = Path(wdl_path)
151+
wdl_dir = p.parent
152+
153+
new_wdl_path = get_flattened_filename(tempdir.name, wdl_path)
154+
155+
with open(wdl_path, "r") as rf, open(new_wdl_path, "w") as wf:
156+
for line in rf:
157+
if line.startswith("import"):
158+
m = re.match(r'import "(.+)"', line)
159+
imported_wdl_name = m.group(1)
160+
imported_wdl_path = (Path(wdl_dir) / imported_wdl_name).resolve()
161+
import_line = re.sub(
162+
imported_wdl_name,
163+
Path(get_flattened_filename(tempdir.name, imported_wdl_path)).name,
164+
line,
165+
)
166+
167+
if " as " in line:
168+
wf.write(import_line)
169+
else:
170+
wf.write(
171+
f'{import_line.strip()} as {re.sub(".wdl", "", Path(imported_wdl_path).name)}\n'
172+
)
173+
174+
flatten_nested_dependencies(tempdir, imported_wdl_path)
175+
else:
176+
wf.write(line)
177+
178+
return new_wdl_path
179+
180+
104181
def open_or_zip(path: Union[str, Path, None]) -> Union[nullcontext, BytesIO, BinaryIO]:
105182
"""Return a context that may be used for reading the contents from the path.
106183

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def local_cromshell_config_json(local_hidden_cromshell_folder):
3131

3232
@pytest.fixture
3333
def test_workflows_path():
34-
return Path(__file__).joinpath("workflows/")
34+
return Path(__file__).parent.joinpath("workflows/")
3535

3636

3737
@pytest.fixture

tests/unit/test_io_utils.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import csv
22
import io
33
import os
4+
import re
45
import shutil
6+
import tempfile
57
from contextlib import redirect_stdout
68
from pathlib import Path
9+
from tempfile import NamedTemporaryFile
710
from zipfile import ZipFile
811

912
import pytest
@@ -353,6 +356,96 @@ def test_update_all_workflow_database_tsv(
353356
):
354357
assert row[column_to_update] == update_value
355358

359+
@pytest.mark.parametrize(
360+
"wdl_content, expected_result",
361+
[
362+
('import "other.wdl"', False), # No nested import
363+
('import "../nested/other.wdl"', True), # Nested import
364+
('import "nested/other.wdl"', False), # Relative path, but not nested
365+
("task my_task { command { echo 'Hello, World!' } }", False), # No import
366+
(
367+
'import "../nested/other.wdl"\nimport "nested/another.wdl"',
368+
True,
369+
), # Multiple imports, one nested
370+
],
371+
)
372+
def test_has_nested_dependencies(self, wdl_content, expected_result):
373+
# Create a temporary file with the provided WDL content
374+
with NamedTemporaryFile(mode="w", delete=False) as temp_file:
375+
temp_file.write(wdl_content)
376+
377+
wdl_path = Path(temp_file.name)
378+
379+
# Call the function with the temporary file path
380+
result = io_utils.has_nested_dependencies(wdl_path)
381+
382+
# Check if the result matches the expected outcome
383+
assert result == expected_result
384+
385+
# Clean up the temporary file
386+
wdl_path.unlink()
387+
388+
@pytest.mark.parametrize(
389+
"wdl_path, flattened_wdl_file",
390+
[
391+
("/dir/path/2/wdl.wdl", "dir-path-2-wdl.wdl"),
392+
("/another/wdl.wdl", "another-wdl.wdl"),
393+
],
394+
)
395+
def test_get_flattened_filename(self, wdl_path, flattened_wdl_file):
396+
# Create a TemporaryDirectory to simulate tempdir
397+
with tempfile.TemporaryDirectory() as tempdir:
398+
# tempdir = Path(tempdir_name)
399+
wdl_path = Path(wdl_path)
400+
401+
# Call the function with the simulated tempdir and wdl_path
402+
result = io_utils.get_flattened_filename(tempdir, wdl_path)
403+
404+
# Check if the result matches the expected outcome
405+
assert result == Path(tempdir).joinpath(flattened_wdl_file)
406+
407+
# Define test cases using @pytest.mark.parametrize
408+
@pytest.mark.parametrize(
409+
"wdl_path, expected_file_content",
410+
[
411+
(
412+
"wdl_with_imports/helloWorld_with_imports.wdl",
413+
["-helloWorld.wdl", "-wdl_with_imports-hello_world_task.wdl"],
414+
),
415+
],
416+
)
417+
def test_flatten_nested_dependencies(
418+
self, wdl_path, expected_file_content, test_workflows_path
419+
):
420+
# Create a temporary directory to simulate tempdir
421+
422+
tempdir = tempfile.TemporaryDirectory()
423+
abs_wdl_path = test_workflows_path.joinpath(wdl_path)
424+
425+
abs_wdl_path_str = str(abs_wdl_path.absolute())
426+
427+
# Call the function with the simulated tempdir and wdl_path
428+
result_path = io_utils.flatten_nested_dependencies(
429+
tempdir=tempdir, wdl_path=abs_wdl_path_str
430+
)
431+
432+
# Check if the result matches the expected outcome
433+
expected_result_path = Path(tempdir.name).joinpath(
434+
re.sub("^-", "", re.sub("/", "-", str(abs_wdl_path)))
435+
)
436+
assert result_path == expected_result_path
437+
438+
# Check if the expected file content is in the result file
439+
for expected_file_content_line in expected_file_content:
440+
parsed_line = (
441+
re.sub("^-", "", re.sub("/", "-", str(abs_wdl_path.parents[1])))
442+
+ expected_file_content_line
443+
)
444+
assert parsed_line in result_path.read_text()
445+
446+
# Clean up the temporary directory
447+
tempdir.cleanup()
448+
356449
@pytest.fixture
357450
def mock_data_path(self):
358451
return Path(__file__).parent.joinpath("mock_data/")
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import "../helloWorld.wdl" as HelloWorldWorkflow
2+
import "hello_world_task.wdl" as helloWorldTask
3+
4+
workflow HelloWorld {
5+
meta {
6+
workflow_description: "echos hello world"
7+
}
8+
parameter_meta {
9+
# Description of inputs:
10+
# Required:
11+
docker: "Docker image in which to run"
12+
# Optional:
13+
mem: "Amount of memory to give to the machine running each task in this workflow."
14+
preemptible_attempts: "Number of times to allow each task in this workflow to be preempted."
15+
disk_space_gb: "Amount of storage disk space (in Gb) to give to each machine running each task in this workflow."
16+
cpu: "Number of CPU cores to give to each machine running each task in this workflow."
17+
boot_disk_size_gb: "Amount of boot disk space (in Gb) to give to each machine running each task in this workflow."
18+
}
19+
String docker
20+
21+
Int? mem
22+
Int? preemptible_attempts
23+
Int? disk_space_gb
24+
Int? cpu
25+
Int? boot_disk_size_gb
26+
27+
call helloWorldTask.HelloWorldTask {
28+
input:
29+
docker = docker,
30+
mem = mem,
31+
preemptible_attempts = preemptible_attempts,
32+
disk_space_gb = disk_space_gb,
33+
cpu = cpu,
34+
boot_disk_size_gb = boot_disk_size_gb
35+
}
36+
37+
output {
38+
File output_file = HelloWorldTask.output_file
39+
}
40+
}
41+
42+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
task HelloWorldTask {
2+
3+
# ------------------------------------------------
4+
# Input args:
5+
6+
# Required:
7+
8+
# Runtime Options:
9+
String docker
10+
Int? mem
11+
Int? preemptible_attempts
12+
Int? disk_space_gb
13+
Int? cpu
14+
Int? boot_disk_size_gb
15+
16+
# ------------------------------------------------
17+
# Process input args:
18+
19+
# ------------------------------------------------
20+
# Get machine settings:
21+
Boolean use_ssd = false
22+
23+
# You may have to change the following two parameter values depending on the task requirements
24+
Int default_ram_mb = 3 * 1024
25+
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
26+
Int default_disk_space_gb = 100
27+
28+
Int default_boot_disk_size_gb = 15
29+
30+
# Mem is in units of GB but our command and memory runtime values are in MB
31+
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
32+
Int command_mem = machine_mem - 1024
33+
34+
# ------------------------------------------------
35+
# Run our command:
36+
command <<<
37+
set -e
38+
echo 'Hello World!'
39+
>>>
40+
41+
# ------------------------------------------------
42+
# Runtime settings:
43+
# runtime {
44+
# docker: docker
45+
# memory: machine_mem + " MB"
46+
# disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
47+
# bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
48+
# preemptible: 0
49+
# cpu: select_first([cpu, 1])
50+
# }
51+
52+
# ------------------------------------------------
53+
# Outputs:
54+
output {
55+
File output_file = stdout()
56+
}
57+
}

0 commit comments

Comments
 (0)