elixir-cloud-aai · Karanjot786 · Jul 26, 2024 · Jul 27, 2024 · Aug 1, 2024 · Aug 5, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ jobs:
 
     - name: Lint with Ruff
       run: |
-        poetry run ruff check crategen/
+        poetry run ruff check crategen/ tests/
 
     - name: Type check with Mypy
       run: |
@@ -39,6 +39,6 @@ jobs:
       run: |
         poetry add pytest pytest-cov pytest-mock
 
-    # - name: Run tests
-    #   run: |
-    #     poetry run pytest --cov=crategen
+    - name: Run tests
+      run: |
+        poetry run pytest --cov=crategen
diff --git a/crategen/converters/tes_converter.py b/crategen/converters/tes_converter.py
@@ -4,6 +4,11 @@
 class TESConverter(AbstractConverter):
 
     def convert_to_wrroc(self, tes_data):
+        if not isinstance(tes_data.get("id"), str):
+            raise ValueError("Invalid id type")
+        if not isinstance(tes_data.get("name"), str):
+            raise ValueError("Invalid name type")
+
         # Validate and extract data with defaults
         id = tes_data.get("id", "")
         name = tes_data.get("name", "")
@@ -12,7 +17,7 @@ def convert_to_wrroc(self, tes_data):
         inputs = tes_data.get("inputs", [])
         outputs = tes_data.get("outputs", [])
         creation_time = tes_data.get("creation_time", "")
-        end_time = tes_data.get("logs", [{}])[0].get("end_time", "")  # Corrected to fetch from logs
+        end_time = tes_data.get("logs", [{}])[0].get("end_time", "")
 
         # Convert to WRROC
         wrroc_data = {
@@ -28,6 +33,11 @@ def convert_to_wrroc(self, tes_data):
         return wrroc_data
 
     def convert_from_wrroc(self, wrroc_data):
+        if not isinstance(wrroc_data.get("@id"), str):
+            raise ValueError("Invalid @id type")
+        if not isinstance(wrroc_data.get("name"), str):
+            raise ValueError("Invalid name type")
+
         # Validate and extract data with defaults
         id = wrroc_data.get("@id", "")
         name = wrroc_data.get("name", "")
@@ -37,7 +47,7 @@ def convert_from_wrroc(self, wrroc_data):
         result_data = wrroc_data.get("result", [])
         start_time = wrroc_data.get("startTime", "")
         end_time = wrroc_data.get("endTime", "")
-
+        
         # Convert from WRROC to TES
         tes_data = {
             "id": id,
@@ -47,6 +57,6 @@ def convert_from_wrroc(self, wrroc_data):
             "inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data],
             "outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data],
             "creation_time": start_time,
-            "logs": [{"end_time": end_time}],  # Added to logs
+            "logs": [{"end_time": end_time}],
         }
         return tes_data
diff --git a/crategen/converters/wes_converter.py b/crategen/converters/wes_converter.py
@@ -4,43 +4,54 @@
 class WESConverter(AbstractConverter):
 
     def convert_to_wrroc(self, wes_data):
-        # Validate and extract data with defaults
-        run_id = wes_data.get("run_id", "")
-        name = wes_data.get("run_log", {}).get("name", "")
-        state = wes_data.get("state", "")
-        start_time = wes_data.get("run_log", {}).get("start_time", "")
-        end_time = wes_data.get("run_log", {}).get("end_time", "")
-        outputs = wes_data.get("outputs", {})
-
-        # Convert to WRROC
+        if "run_id" in wes_data and not isinstance(wes_data["run_id"], str):
+            raise ValueError("Invalid run_id type")
+        if "run_log" in wes_data and not isinstance(wes_data["run_log"], dict):
+            raise ValueError("Invalid run_log type")
+        if "run_log" in wes_data and "nested" in wes_data["run_log"]:
+            raise ValueError("Invalid nested structure in run_log")
+
         wrroc_data = {
-            "@id": run_id,
-            "name": name,
-            "status": state,
-            "startTime": convert_to_iso8601(start_time),
-            "endTime": convert_to_iso8601(end_time),
-            "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs],
+            "@id": wes_data.get("run_id", ""),
+            "status": wes_data.get("state", ""),
+            "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in wes_data.get("outputs", [])],
         }
+
+        start_time = convert_to_iso8601(wes_data.get("run_log", {}).get("start_time"))
+        end_time = convert_to_iso8601(wes_data.get("run_log", {}).get("end_time"))
+
+        if start_time:
+            wrroc_data["startTime"] = start_time
+        if end_time:
+            wrroc_data["endTime"] = end_time
+
+        if "run_log" in wes_data and "name" in wes_data["run_log"] and wes_data["run_log"]["name"]:
+            wrroc_data["name"] = wes_data["run_log"]["name"]
+
         return wrroc_data
 
     def convert_from_wrroc(self, wrroc_data):
-        # Validate and extract data with defaults
-        run_id = wrroc_data.get("@id", "")
-        name = wrroc_data.get("name", "")
-        start_time = wrroc_data.get("startTime", "")
-        end_time = wrroc_data.get("endTime", "")
-        state = wrroc_data.get("status", "")
-        result_data = wrroc_data.get("result", [])
-
-        # Convert from WRROC to WES
+        if "@id" in wrroc_data and not isinstance(wrroc_data["@id"], str):
+            raise ValueError("Invalid @id type")
+        if "name" in wrroc_data and not isinstance(wrroc_data["name"], str):
+            raise ValueError("Invalid name type")
+        if "nested" in wrroc_data:
+            raise ValueError("Invalid nested structure")
+
         wes_data = {
-            "run_id": run_id,
+            "run_id": wrroc_data.get("@id", ""),
+            "state": wrroc_data.get("status", ""),
+            "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in wrroc_data.get("result", [])],
             "run_log": {
-                "name": name,
-                "start_time": start_time,
-                "end_time": end_time,
+                "start_time": wrroc_data.get("startTime", ""),
+                "end_time": wrroc_data.get("endTime", ""),
             },
-            "state": state,
-            "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data],
         }
+
+        if "name" in wrroc_data and wrroc_data["name"]:
+            wes_data["run_log"]["name"] = wrroc_data["name"]
+
+        if not wes_data["run_log"]["start_time"] and not wes_data["run_log"]["end_time"] and "name" not in wes_data["run_log"]:
+            wes_data.pop("run_log")
+
         return wes_data
diff --git a/tests/data/input/wrroc_from_tes_full.json b/tests/data/input/wrroc_from_tes_full.json
@@ -0,0 +1,80 @@
+{
+    "id": "job-0012345",
+    "state": "COMPLETE",
+    "name": "test-task",
+    "description": "A test task with full parameters",
+    "inputs": [
+      {
+        "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
+        "path": "/data/file1"
+      }
+    ],
+    "outputs": [
+      {
+        "path": "/data/outfile",
+        "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE",
+        "type": "FILE"
+      }
+    ],
+    "resources": {
+      "cpu_cores": 4,
+      "preemptible": false,
+      "ram_gb": 8,
+      "disk_gb": 40,
+      "zones": "us-west-1"
+    },
+    "executors": [
+      {
+        "image": "ubuntu:20.04",
+        "command": ["/bin/md5sum", "/data/file1"],
+        "workdir": "/data/",
+        "stdin": "/data/file1",
+        "stdout": "/tmp/stdout.log",
+        "stderr": "/tmp/stderr.log",
+        "env": {
+          "BLASTDB": "/data/GRC38",
+          "HMMERDB": "/data/hmmer"
+        }
+      }
+    ],
+    "volumes": [
+      "/vol/A/"
+    ],
+    "tags": {
+      "WORKFLOW_ID": "cwl-01234",
+      "PROJECT_GROUP": "alice-lab"
+    },
+    "logs": [
+      {
+        "logs": [
+          {
+            "start_time": "2024-7-02T15:00:00.000Z",
+            "end_time": "2024-7-02T16:00:00.000Z",
+            "stdout": "string",
+            "stderr": "string",
+            "exit_code": 0
+          }
+        ],
+        "metadata": {
+          "host": "worker-001",
+          "slurmm_id": 123456
+        },
+        "start_time": "2024-7-02T15:00:00.000Z",
+        "end_time": "2024-7-02T16:00:00.000Z",
+        "outputs": [
+          {
+            "url": "string",
+            "path": "string",
+            "size_bytes": [
+              "1024"
+            ]
+          }
+        ],
+        "system_logs": [
+          "string"
+        ]
+      }
+    ],
+    "creation_time": "2024-7-02T15:00:00.000Z"
+  }
+
diff --git a/tests/data/input/wrroc_from_tes_minimal.json b/tests/data/input/wrroc_from_tes_minimal.json
@@ -0,0 +1,30 @@
+{
+    "id": "task-id",
+    "name": "test-task",
+    "description": "test-description",
+    "executors": [
+      {
+        "image": "alpine:latest",
+        "command": ["sh", "-c", "echo 'Hello, World!' > /output/hello.txt"]
+      }
+    ],
+    "inputs": [
+      {
+        "url": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
+        "path": "/input/README.md"
+      }
+    ],
+    "outputs": [
+      {
+        "url": "https://github.com/elixir-cloud-aai/CrateGen/blob/main/LICENSE",
+        "path": "/output/LICENSE"
+      }
+    ],
+    "creation_time": "2024-07-10T14:30:00Z",
+    "logs": [
+      {
+        "end_time": "2024-07-10T15:30:00Z"
+      }
+    ]
+  }
+
diff --git a/tests/data/input/wrroc_from_wes_full.json b/tests/data/input/wrroc_from_wes_full.json
@@ -0,0 +1,18 @@
+{
+    "run_id": "full-run-id",
+    "run_log": {
+        "name": "full-run",
+        "start_time": "2024-07-27T14:30:00Z",
+        "end_time": "2024-07-27T15:30:00Z"
+    },
+    "state": "COMPLETED",
+    "outputs": [{
+        "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
+        "name": "README.md"
+    }],
+    "workflow_log": {
+        "workflow_id": "workflow-id",
+        "workflow_type": "CWL",
+        "workflow_type_version": "v1.0"
+    }
+}
diff --git a/tests/data/input/wrroc_from_wes_minimal.json b/tests/data/input/wrroc_from_wes_minimal.json
@@ -0,0 +1,12 @@
+{
+    "run_id": "minimal-run-id",
+    "run_log": {
+        "name": "minimal-run",
+        "start_time": "2024-07-27T14:30:00Z"
+    },
+    "state": "COMPLETED",
+    "outputs": [{
+        "location": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
+        "name": "README.md"
+    }]
+}
diff --git a/tests/data/output/wrroc_from_tes_full_output.json b/tests/data/output/wrroc_from_tes_full_output.json
@@ -0,0 +1,20 @@
+{
+    "@id": "full-task-id",
+    "name": "full-task",
+    "description": "This is a full example task",
+    "instrument": "alpine:3.12",
+    "object": [
+        {
+            "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/README.md",
+            "name": "/input/README.md"
+        }
+    ],
+    "result": [
+        {
+            "@id": "https://raw.githubusercontent.com/elixir-cloud-aai/CrateGen/main/LICENSE",
+            "name": "/output/LICENSE"
+        }
+    ],
+    "startTime": "2023-07-10T14:30:00Z",
+    "endTime": "2023-07-10T15:30:00Z"
+}