Skip to content

Commit aa5ddd1

Browse files
committed
replace legacy with schema validator
also change cubids print-metadata-fields to account for json file errors due to not been validated yet
1 parent c16650b commit aa5ddd1

File tree

2 files changed

+43
-64
lines changed

2 files changed

+43
-64
lines changed

cubids/cubids.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self):
13361336
found_fields = set()
13371337
for json_file in Path(self.path).rglob("*.json"):
13381338
if ".git" not in str(json_file):
1339-
with open(json_file, "r") as jsonr:
1340-
metadata = json.load(jsonr)
1341-
found_fields.update(metadata.keys())
1339+
# add this in case `print-metadata-fields` is run before validate
1340+
try:
1341+
with open(json_file, "r", encoding="utf-8") as jsonr:
1342+
content = jsonr.read().strip()
1343+
if not content:
1344+
print(f"Empty file: {json_file}")
1345+
continue
1346+
metadata = json.loads(content)
1347+
found_fields.update(metadata.keys())
1348+
except json.JSONDecodeError as e:
1349+
print(f"Error decoding JSON in {json_file}: {e}")
1350+
except Exception as e:
1351+
print(f"Unexpected error with file {json_file}: {e}")
13421352
return sorted(found_fields)
13431353

13441354
def remove_metadata_fields(self, fields_to_remove):

cubids/validator.py

Lines changed: 30 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
def build_validator_call(path, ignore_headers=False):
1616
"""Build a subprocess command to the bids validator."""
17-
# build docker call
18-
# CuBIDS automatically ignores subject consistency.
19-
command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"]
17+
# New schema BIDS validator doesn't have option to ignore subject consistency.
18+
# Build the deno command to run the BIDS validator.
19+
command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"]
2020

2121
if ignore_headers:
2222
command.append("--ignoreNiftiHeaders")
@@ -87,32 +87,6 @@ def parse_validator_output(output):
8787
Dataframe of validator output.
8888
"""
8989

90-
def get_nested(dct, *keys):
91-
"""Get a nested value from a dictionary.
92-
93-
Parameters
94-
----------
95-
dct : :obj:`dict`
96-
Dictionary to get value from.
97-
keys : :obj:`list`
98-
List of keys to get value from.
99-
100-
Returns
101-
-------
102-
:obj:`dict`
103-
The nested value.
104-
"""
105-
for key in keys:
106-
try:
107-
dct = dct[key]
108-
except (KeyError, TypeError):
109-
return None
110-
return dct
111-
112-
data = json.loads(output)
113-
114-
issues = data["issues"]
115-
11690
def parse_issue(issue_dict):
11791
"""Parse a single issue from the validator output.
11892
@@ -126,30 +100,27 @@ def parse_issue(issue_dict):
126100
return_dict : :obj:`dict`
127101
Dictionary of parsed issue.
128102
"""
129-
return_dict = {}
130-
return_dict["files"] = [
131-
get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "")
132-
]
133-
return_dict["type"] = issue_dict.get("key", "")
134-
return_dict["severity"] = issue_dict.get("severity", "")
135-
return_dict["description"] = issue_dict.get("reason", "")
136-
return_dict["code"] = issue_dict.get("code", "")
137-
return_dict["url"] = issue_dict.get("helpUrl", "")
138-
139-
return return_dict
140-
141-
df = pd.DataFrame()
142-
143-
for warn in issues["warnings"]:
144-
parsed = parse_issue(warn)
145-
parsed = pd.DataFrame(parsed)
146-
df = pd.concat([df, parsed], ignore_index=True)
147-
148-
for err in issues["errors"]:
149-
parsed = parse_issue(err)
150-
parsed = pd.DataFrame(parsed)
151-
df = pd.concat([df, parsed], ignore_index=True)
103+
return {
104+
"location": issue_dict.get("location", ""),
105+
"code": issue_dict.get("code", ""),
106+
"subCode": issue_dict.get("subCode", ""),
107+
"severity": issue_dict.get("severity", ""),
108+
"rule": issue_dict.get("rule", ""),
109+
}
110+
111+
# Load JSON data
112+
data = json.loads(output)
113+
114+
# Extract issues
115+
issues = data.get("issues", {}).get("issues", [])
116+
if not issues:
117+
return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"])
118+
119+
# Parse all issues
120+
parsed_issues = [parse_issue(issue) for issue in issues]
152121

122+
# Convert to DataFrame
123+
df = pd.DataFrame(parsed_issues)
153124
return df
154125

155126

@@ -161,12 +132,10 @@ def get_val_dictionary():
161132
val_dict : dict
162133
Dictionary of values.
163134
"""
164-
val_dict = {}
165-
val_dict["files"] = {"Description": "File with warning orerror"}
166-
val_dict["type"] = {"Description": "BIDS validation warning or error"}
167-
val_dict["severity"] = {"Description": "gravity of problem (warning/error"}
168-
val_dict["description"] = {"Description": "Description of warning/error"}
169-
val_dict["code"] = {"Description": "BIDS validator issue code number"}
170-
val_dict["url"] = {"Description": "Link to the issue's neurostars thread"}
171-
172-
return val_dict
135+
return {
136+
"location": {"Description": "File with the validation issue."},
137+
"code": {"Description": "Code of the validation issue."},
138+
"subCode": {"Description": "Subcode providing additional issue details."},
139+
"severity": {"Description": "Severity of the issue (e.g., warning, error)."},
140+
"rule": {"Description": "Validation rule that triggered the issue."},
141+
}

0 commit comments

Comments
 (0)