Skip to content

Commit 5e4eb01

Browse files
2 dependent workflows each composed of 2 independent cmdlinetool
1 parent a8d9fff commit 5e4eb01

File tree

15 files changed

+491
-1
lines changed

15 files changed

+491
-1
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ pi-simulate-v2 = "dirac_cwl_proto.modules.pi_simulate_v2:app"
4444
pi-gather = "dirac_cwl_proto.modules.pi_gather:app"
4545
crypto = "dirac_cwl_proto.modules.crypto:app"
4646
lhcb-app = "dirac_cwl_proto.modules.lhcb_app:app"
47+
random-data-gen = "dirac_cwl_proto.modules.random_data_gen:app"
48+
gaussian-fit = "dirac_cwl_proto.modules.gaussian_fit:app"
4749

4850
[tool.setuptools_scm]
4951

src/dirac_cwl_proto/metadata_models.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,32 @@ def post_process(self, job_path: Path):
323323
outputs = glob.glob(str(job_path / "mandelbrot_image*bmp"))
324324
if outputs:
325325
self._store_output("data-merged", outputs[0])
326+
327+
328+
class GaussianFitModel(IMetadataModel):
329+
"""Gaussian Fit metadata model."""
330+
331+
# Query parameters
332+
data_file_1: str
333+
data_file_2: str
334+
335+
# Input data
336+
data: Path | None
337+
338+
def get_input_query(self, input_name: str) -> Path | None:
339+
if input_name == "data_file_1":
340+
return Path(self.data_file_1)
341+
elif input_name == "data_file_2":
342+
return Path(self.data_file_2)
343+
return None
344+
345+
def get_output_query(self, output_name: str) -> Path | None:
346+
if output_name == "fit-data" and self.data:
347+
return self.data
348+
return None
349+
350+
def post_process(self):
351+
"""Post process the outputs of a job."""
352+
outputs = glob.glob("fit*.txt")
353+
if outputs:
354+
self._store_output("fit-data", outputs)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import typer
2+
from rich.console import Console
3+
4+
app = typer.Typer()
5+
console = Console()
6+
7+
output: str = typer.Option("fit.txt", "--output", "-o", help="Output file")
8+
9+
10+
def gaussian_fit(input_data: str):
11+
"""Dummy gaussian fit."""
12+
with open(input_data, "r") as f:
13+
data = f.readlines()
14+
mu = sum(map(float, data)) / len(data)
15+
sigma = sum([(float(x) - mu) ** 2 for x in data]) / len(data)
16+
console.print(f"Mean: {mu}, Std dev: {sigma}")
17+
return mu, sigma
18+
19+
20+
@app.command()
21+
def main(input_data_files: list[str], output_data=output):
22+
for input_data in input_data_files:
23+
mu, sigma = gaussian_fit(input_data)
24+
with open(output_data, "a") as f:
25+
f.write(f"{input_data}: Mean: {mu}, Std dev: {sigma}\n")
26+
27+
28+
if __name__ == "__main__":
29+
app()
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import random
2+
3+
import typer
4+
from rich.console import Console
5+
6+
app = typer.Typer()
7+
console = Console()
8+
9+
10+
@app.command()
11+
def generate_random_data(file_path: str = "data.txt", num_lines: int = 100):
12+
with open(file_path, "w") as f:
13+
mu = random.randint(1, 10)
14+
sig = random.randint(1, 5)
15+
for _ in range(num_lines):
16+
rd = random.gauss(mu, sig)
17+
f.write(f"{rd}\n")
18+
typer.echo(f"data file: {file_path}, mean: {mu}, std dev: {sig}")
19+
20+
21+
if __name__ == "__main__":
22+
app()

test/test_workflows.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _cleanup():
7171
(
7272
"test/workflows/crypto/rot13.cwl",
7373
["test/workflows/crypto/type_dependencies/job/inputs-crypto_complete.yaml"],
74-
),
74+
),
7575
# Base64 only
7676
(
7777
"test/workflows/crypto/base64.cwl",
@@ -131,6 +131,28 @@ def _cleanup():
131131
"test/workflows/mandelbrot/type_dependencies/job/inputs-mandelbrot_imagemerge.yaml"
132132
],
133133
),
134+
# --- Gaussian fit example ---
135+
# Complete
136+
(
137+
"test/workflows/gaussian_fit/main-workflow.cwl",
138+
[
139+
"test/workflows/gaussian_fit/type_dependencies/production/metadata-gaussian-fit-complete.yaml"
140+
],
141+
),
142+
# Data generation workflow
143+
(
144+
"test/workflows/gaussian_fit/data_generation/data-generation.cwl",
145+
[
146+
"test/workflows/gaussian_fit/type_dependencies/transformation/inputs-data-generation.yaml"
147+
],
148+
),
149+
# Gaussian fit workflow
150+
(
151+
"test/workflows/gaussian_fit/gaussian_fit/gaussian-fit-workflow.cwl",
152+
[
153+
"test/workflows/gaussian_fit/type_dependencies/transformation/inputs-gaussian-fit.yaml"
154+
],
155+
),
134156
],
135157
)
136158
def test_run_job_success(cli_runner, cleanup, cwl_file, inputs):
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
cwlVersion: v1.2
2+
class: Workflow
3+
label: "Data Generation Workflow"
4+
doc: >
5+
This workflow generates data using two independent data-generation tools.
6+
7+
inputs:
8+
data_file_name_1:
9+
type: string
10+
data_file_name_2:
11+
type: string
12+
13+
outputs:
14+
data1:
15+
type: File
16+
outputSource: data-generation-1/data
17+
data2:
18+
type: File
19+
outputSource: data-generation-2/data
20+
log1:
21+
type: File[]
22+
outputSource: data-generation-1/log
23+
log2:
24+
type: File[]
25+
outputSource: data-generation-2/log
26+
27+
steps:
28+
data-generation-1:
29+
run: data-generation.cwl
30+
in:
31+
output_file_name: data_file_name_1
32+
out: [data, log]
33+
34+
data-generation-2:
35+
run: data-generation.cwl
36+
in:
37+
output_file_name: data_file_name_2
38+
out: [data, log]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
cwlVersion: v1.2
2+
class: CommandLineTool
3+
label: "Benchmark Data Generation Tool"
4+
5+
inputs:
6+
output_file_name:
7+
type: string
8+
default: data.txt
9+
inputBinding:
10+
prefix: "--file-path"
11+
12+
outputs:
13+
data:
14+
type: File
15+
outputBinding:
16+
glob: $(inputs.output_file_name)
17+
log:
18+
type: File[]?
19+
outputBinding:
20+
glob: "*.log"
21+
22+
baseCommand: ["random-data-gen"]
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
cwlVersion: v1.2
2+
class: Workflow
3+
label: "Fit Workflow"
4+
doc: >
5+
This workflow runs Gaussian fit on the data generated by the data-generation workflow.
6+
7+
requirements:
8+
SubworkflowFeatureRequirement: {}
9+
MultipleInputFeatureRequirement: {}
10+
11+
inputs:
12+
data1:
13+
type: File
14+
data2:
15+
type: File
16+
17+
outputs:
18+
fit-data:
19+
type: File[]
20+
outputSource:
21+
- fit-1/fit-data
22+
- fit-2/fit-data
23+
linkMerge: merge_flattened
24+
log:
25+
type: File[]
26+
outputSource:
27+
- fit-1/log
28+
- fit-2/log
29+
linkMerge: merge_flattened
30+
31+
steps:
32+
fit-1:
33+
run: gaussian-fit.cwl
34+
in:
35+
data: data1
36+
out: [fit-data, log]
37+
38+
fit-2:
39+
run: gaussian-fit.cwl
40+
in:
41+
data: data2
42+
out: [fit-data, log]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
cwlVersion: v1.2
2+
class: CommandLineTool
3+
label: "Gaussian Fit Tool"
4+
5+
inputs:
6+
data:
7+
type: File
8+
inputBinding:
9+
position: 1
10+
11+
outputs:
12+
fit-data:
13+
type: File[]
14+
outputBinding:
15+
glob: ["fit.txt"]
16+
log:
17+
type: File[]
18+
outputBinding:
19+
glob: ["fit.log"]
20+
21+
baseCommand: ["gaussian-fit"]
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
cwlVersion: v1.2
2+
class: Workflow
3+
label: "Main Workflow"
4+
doc: >
5+
This workflow is composed of two dependent workflows
6+
each composed of two command line tools:
7+
- data-generation: produce data
8+
- fit: run gaussian fit on data
9+
10+
requirements:
11+
SubworkflowFeatureRequirement: {}
12+
MultipleInputFeatureRequirement: {}
13+
14+
inputs:
15+
data_file_name_1:
16+
type: string
17+
default: data_1.txt
18+
data_file_name_2:
19+
type: string
20+
default: data_2.txt
21+
22+
outputs:
23+
fit-data:
24+
type: File[]
25+
outputSource:
26+
- fit/fit-data
27+
linkMerge: merge_flattened
28+
logs:
29+
type: File[]?
30+
outputSource:
31+
- fit/log
32+
linkMerge: merge_flattened
33+
34+
steps:
35+
data-generation:
36+
run: ./data_generation/data-generation-workflow.cwl
37+
in:
38+
data_file_name_1: data_file_name_1
39+
data_file_name_2: data_file_name_2
40+
out: [data1, data2, log1, log2]
41+
42+
fit:
43+
run: ./gaussian_fit/gaussian-fit-workflow.cwl
44+
in:
45+
data1: data-generation/data1
46+
data2: data-generation/data2
47+
out: [fit-data, log]

0 commit comments

Comments
 (0)