replace legacy with schema validator

tien-tong · tien-tong · commit aa5ddd199fed · 2024-11-18T09:31:46.000-05:00
also change cubids print-metadata-fields to account for json file errors due to not been validated yet
diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self):
         found_fields = set()
         for json_file in Path(self.path).rglob("*.json"):
             if ".git" not in str(json_file):
-                with open(json_file, "r") as jsonr:
-                    metadata = json.load(jsonr)
-                found_fields.update(metadata.keys())
+                # add this in case `print-metadata-fields` is run before validate
+                try:
+                    with open(json_file, "r", encoding="utf-8") as jsonr:
+                        content = jsonr.read().strip()
+                        if not content:
+                            print(f"Empty file: {json_file}")
+                            continue
+                        metadata = json.loads(content)
+                    found_fields.update(metadata.keys())
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding JSON in {json_file}: {e}")
+                except Exception as e:
+                    print(f"Unexpected error with file {json_file}: {e}")
         return sorted(found_fields)
 
     def remove_metadata_fields(self, fields_to_remove):
diff --git a/cubids/validator.py b/cubids/validator.py
@@ -14,9 +14,9 @@
 
 def build_validator_call(path, ignore_headers=False):
     """Build a subprocess command to the bids validator."""
-    # build docker call
-    # CuBIDS automatically ignores subject consistency.
-    command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"]
+    # New schema BIDS validator doesn't have option to ignore subject consistency.
+    # Build the deno command to run the BIDS validator.
+    command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"]
 
     if ignore_headers:
         command.append("--ignoreNiftiHeaders")
@@ -87,32 +87,6 @@ def parse_validator_output(output):
         Dataframe of validator output.
     """
 
-    def get_nested(dct, *keys):
-        """Get a nested value from a dictionary.
-
-        Parameters
-        ----------
-        dct : :obj:`dict`
-            Dictionary to get value from.
-        keys : :obj:`list`
-            List of keys to get value from.
-
-        Returns
-        -------
-        :obj:`dict`
-            The nested value.
-        """
-        for key in keys:
-            try:
-                dct = dct[key]
-            except (KeyError, TypeError):
-                return None
-        return dct
-
-    data = json.loads(output)
-
-    issues = data["issues"]
-
     def parse_issue(issue_dict):
         """Parse a single issue from the validator output.
 
@@ -126,30 +100,27 @@ def parse_issue(issue_dict):
         return_dict : :obj:`dict`
             Dictionary of parsed issue.
         """
-        return_dict = {}
-        return_dict["files"] = [
-            get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "")
-        ]
-        return_dict["type"] = issue_dict.get("key", "")
-        return_dict["severity"] = issue_dict.get("severity", "")
-        return_dict["description"] = issue_dict.get("reason", "")
-        return_dict["code"] = issue_dict.get("code", "")
-        return_dict["url"] = issue_dict.get("helpUrl", "")
-
-        return return_dict
-
-    df = pd.DataFrame()
-
-    for warn in issues["warnings"]:
-        parsed = parse_issue(warn)
-        parsed = pd.DataFrame(parsed)
-        df = pd.concat([df, parsed], ignore_index=True)
-
-    for err in issues["errors"]:
-        parsed = parse_issue(err)
-        parsed = pd.DataFrame(parsed)
-        df = pd.concat([df, parsed], ignore_index=True)
+        return {
+            "location": issue_dict.get("location", ""),
+            "code": issue_dict.get("code", ""),
+            "subCode": issue_dict.get("subCode", ""),
+            "severity": issue_dict.get("severity", ""),
+            "rule": issue_dict.get("rule", ""),
+        }
+
+    # Load JSON data
+    data = json.loads(output)
+
+    # Extract issues
+    issues = data.get("issues", {}).get("issues", [])
+    if not issues:
+        return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"])
+
+    # Parse all issues
+    parsed_issues = [parse_issue(issue) for issue in issues]
 
+    # Convert to DataFrame
+    df = pd.DataFrame(parsed_issues)
     return df
 
 
@@ -161,12 +132,10 @@ def get_val_dictionary():
     val_dict : dict
         Dictionary of values.
     """
-    val_dict = {}
-    val_dict["files"] = {"Description": "File with warning orerror"}
-    val_dict["type"] = {"Description": "BIDS validation warning or error"}
-    val_dict["severity"] = {"Description": "gravity of problem (warning/error"}
-    val_dict["description"] = {"Description": "Description of warning/error"}
-    val_dict["code"] = {"Description": "BIDS validator issue code number"}
-    val_dict["url"] = {"Description": "Link to the issue's neurostars thread"}
-
-    return val_dict
+    return {
+        "location": {"Description": "File with the validation issue."},
+        "code": {"Description": "Code of the validation issue."},
+        "subCode": {"Description": "Subcode providing additional issue details."},
+        "severity": {"Description": "Severity of the issue (e.g., warning, error)."},
+        "rule": {"Description": "Validation rule that triggered the issue."},
+    }