Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:

- name: Lint with Ruff
run: |
poetry run ruff check crategen/
poetry run ruff check crategen/ tests/

- name: Type check with Mypy
run: |
Expand All @@ -39,6 +39,6 @@ jobs:
run: |
poetry add pytest pytest-cov pytest-mock

# - name: Run tests
# run: |
# poetry run pytest --cov=crategen
- name: Run tests
run: |
poetry run pytest --cov=crategen
16 changes: 13 additions & 3 deletions crategen/converters/tes_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
class TESConverter(AbstractConverter):

def convert_to_wrroc(self, tes_data):
if not isinstance(tes_data.get("id"), str):
raise ValueError("Invalid id type")
if not isinstance(tes_data.get("name"), str):
raise ValueError("Invalid name type")

# Validate and extract data with defaults
id = tes_data.get("id", "")
name = tes_data.get("name", "")
Expand All @@ -12,7 +17,7 @@ def convert_to_wrroc(self, tes_data):
inputs = tes_data.get("inputs", [])
outputs = tes_data.get("outputs", [])
creation_time = tes_data.get("creation_time", "")
end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Corrected to fetch from logs
end_time = tes_data.get("logs", [{}])[0].get("end_time", "")

# Convert to WRROC
wrroc_data = {
Expand All @@ -28,6 +33,11 @@ def convert_to_wrroc(self, tes_data):
return wrroc_data

def convert_from_wrroc(self, wrroc_data):
if not isinstance(wrroc_data.get("@id"), str):
raise ValueError("Invalid @id type")
if not isinstance(wrroc_data.get("name"), str):
raise ValueError("Invalid name type")

# Validate and extract data with defaults
id = wrroc_data.get("@id", "")
name = wrroc_data.get("name", "")
Expand All @@ -37,7 +47,7 @@ def convert_from_wrroc(self, wrroc_data):
result_data = wrroc_data.get("result", [])
start_time = wrroc_data.get("startTime", "")
end_time = wrroc_data.get("endTime", "")

# Convert from WRROC to TES
tes_data = {
"id": id,
Expand All @@ -47,6 +57,6 @@ def convert_from_wrroc(self, wrroc_data):
"inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data],
"outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data],
"creation_time": start_time,
"logs": [{"end_time": end_time}], # Added to logs
"logs": [{"end_time": end_time}],
}
return tes_data
71 changes: 41 additions & 30 deletions crategen/converters/wes_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,54 @@
class WESConverter(AbstractConverter):

def convert_to_wrroc(self, wes_data):
# Validate and extract data with defaults
run_id = wes_data.get("run_id", "")
name = wes_data.get("run_log", {}).get("name", "")
state = wes_data.get("state", "")
start_time = wes_data.get("run_log", {}).get("start_time", "")
end_time = wes_data.get("run_log", {}).get("end_time", "")
outputs = wes_data.get("outputs", {})

# Convert to WRROC
if "run_id" in wes_data and not isinstance(wes_data["run_id"], str):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should use models for data validation, its much simpler and considerably more convenient.

raise ValueError("Invalid run_id type")
if "run_log" in wes_data and not isinstance(wes_data["run_log"], dict):
raise ValueError("Invalid run_log type")
if "run_log" in wes_data and "nested" in wes_data["run_log"]:
raise ValueError("Invalid nested structure in run_log")

wrroc_data = {
"@id": run_id,
"name": name,
"status": state,
"startTime": convert_to_iso8601(start_time),
"endTime": convert_to_iso8601(end_time),
"result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs],
"@id": wes_data.get("run_id", ""),
"status": wes_data.get("state", ""),
"result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in wes_data.get("outputs", [])],
}

start_time = convert_to_iso8601(wes_data.get("run_log", {}).get("start_time"))
end_time = convert_to_iso8601(wes_data.get("run_log", {}).get("end_time"))

if start_time:
wrroc_data["startTime"] = start_time
if end_time:
wrroc_data["endTime"] = end_time

if "run_log" in wes_data and "name" in wes_data["run_log"] and wes_data["run_log"]["name"]:
wrroc_data["name"] = wes_data["run_log"]["name"]

return wrroc_data

def convert_from_wrroc(self, wrroc_data):
# Validate and extract data with defaults
run_id = wrroc_data.get("@id", "")
name = wrroc_data.get("name", "")
start_time = wrroc_data.get("startTime", "")
end_time = wrroc_data.get("endTime", "")
state = wrroc_data.get("status", "")
result_data = wrroc_data.get("result", [])

# Convert from WRROC to WES
if "@id" in wrroc_data and not isinstance(wrroc_data["@id"], str):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, use models for data validation

raise ValueError("Invalid @id type")
if "name" in wrroc_data and not isinstance(wrroc_data["name"], str):
raise ValueError("Invalid name type")
if "nested" in wrroc_data:
raise ValueError("Invalid nested structure")

wes_data = {
"run_id": run_id,
"run_id": wrroc_data.get("@id", ""),
"state": wrroc_data.get("status", ""),
"outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in wrroc_data.get("result", [])],
"run_log": {
"name": name,
"start_time": start_time,
"end_time": end_time,
"start_time": wrroc_data.get("startTime", ""),
"end_time": wrroc_data.get("endTime", ""),
},
"state": state,
"outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data],
}

if "name" in wrroc_data and wrroc_data["name"]:
wes_data["run_log"]["name"] = wrroc_data["name"]

if not wes_data["run_log"]["start_time"] and not wes_data["run_log"]["end_time"] and "name" not in wes_data["run_log"]:
wes_data.pop("run_log")

return wes_data
80 changes: 80 additions & 0 deletions tests/data/input/wrroc_from_tes_full.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"id": "job-0012345",
"state": "COMPLETE",
"name": "test-task",
"description": "A test task with full parameters",
"inputs": [
{
"url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
"path": "/data/file1"
}
],
"outputs": [
{
"path": "/data/outfile",
"url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE",
"type": "FILE"
}
],
"resources": {
"cpu_cores": 4,
"preemptible": false,
"ram_gb": 8,
"disk_gb": 40,
"zones": "us-west-1"
},
"executors": [
{
"image": "ubuntu:20.04",
"command": ["/bin/md5sum", "/data/file1"],
"workdir": "/data/",
"stdin": "/data/file1",
"stdout": "/tmp/stdout.log",
"stderr": "/tmp/stderr.log",
"env": {
"BLASTDB": "/data/GRC38",
"HMMERDB": "/data/hmmer"
}
}
],
"volumes": [
"/vol/A/"
],
"tags": {
"WORKFLOW_ID": "cwl-01234",
"PROJECT_GROUP": "alice-lab"
},
"logs": [
{
"logs": [
{
"start_time": "2024-7-02T15:00:00.000Z",
"end_time": "2024-7-02T16:00:00.000Z",
"stdout": "string",
"stderr": "string",
"exit_code": 0
}
],
"metadata": {
"host": "worker-001",
"slurmm_id": 123456
},
"start_time": "2024-7-02T15:00:00.000Z",
"end_time": "2024-7-02T16:00:00.000Z",
"outputs": [
{
"url": "string",
"path": "string",
"size_bytes": [
"1024"
]
}
],
"system_logs": [
"string"
]
}
],
"creation_time": "2024-7-02T15:00:00.000Z"
}

30 changes: 30 additions & 0 deletions tests/data/input/wrroc_from_tes_minimal.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"id": "task-id",
"name": "test-task",
"description": "test-description",
"executors": [
{
"image": "alpine:latest",
"command": ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"]
}
],
"inputs": [
{
"url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
"path": "/input/README.md"
}
],
"outputs": [
{
"url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE",
"path": "/output/LICENSE"
}
],
"creation_time": "2024-07-10T14:30:00Z",
"logs": [
{
"end_time": "2024-07-10T15:30:00Z"
}
]
}

18 changes: 18 additions & 0 deletions tests/data/input/wrroc_from_wes_full.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"run_id": "full-run-id",
"run_log": {
"name": "full-run",
"start_time": "2024-07-27T14:30:00Z",
"end_time": "2024-07-27T15:30:00Z"
},
"state": "COMPLETED",
"outputs": [{
"location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
"name": "README.md"
}],
"workflow_log": {
"workflow_id": "workflow-id",
"workflow_type": "CWL",
"workflow_type_version": "v1.0"
}
}
12 changes: 12 additions & 0 deletions tests/data/input/wrroc_from_wes_minimal.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"run_id": "minimal-run-id",
"run_log": {
"name": "minimal-run",
"start_time": "2024-07-27T14:30:00Z"
},
"state": "COMPLETED",
"outputs": [{
"location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
"name": "README.md"
}]
}
20 changes: 20 additions & 0 deletions tests/data/output/wrroc_from_tes_full_output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"@id": "full-task-id",
"name": "full-task",
"description": "This is a full example task",
"instrument": "alpine:3.12",
"object": [
{
"@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
"name": "/input/README.md"
}
],
"result": [
{
"@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE",
"name": "/output/LICENSE"
}
],
"startTime": "2023-07-10T14:30:00Z",
"endTime": "2023-07-10T15:30:00Z"
}
Loading