Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions util/summarize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Make the script executable.

The shebang is present but the file lacks execute permissions.

Run this command to fix:

chmod +x util/summarize.py
🧰 Tools
🪛 Ruff (0.13.3)

1-1: Shebang is present but file is not executable

(EXE001)

🤖 Prompt for AI Agents
In util/summarize.py at line 1 the shebang is present but the file lacks execute
permissions; make the script executable by setting the user (and optionally
group/other) execute bit, e.g. run chmod +x util/summarize.py from the repo root
and verify the permission change with ls -l util/summarize.py.

#
# Read the git history of the `dashboard/dashboard-results.yml` file,
# and generate a table of passing check counts on each date.

import csv
import subprocess
import yaml

from collections import defaultdict
from datetime import datetime


def list_commits(path):
"""Given a file path,
return a list of dates and commit hashes
for each commit that modified that file."""
process = subprocess.run(
["git", "log", "--follow", "--reverse", "--", path],
check=True,
text=True,
stdout=subprocess.PIPE
)
commits = []
commit = None
for line in process.stdout.splitlines():
if line.startswith("commit "):
commit = line.replace("commit ", "").strip()
elif line.startswith("Date: "):
date = line.replace("Date: ", "").strip()
dt = datetime.strptime(date, '%a %b %d %H:%M:%S %Y %z')
ymd = dt.strftime("%Y-%m-%d")
commits.append([ymd, commit])
return commits


def summarize_run(data):
"""Given a dashboard-results YAML
return a dictionary from principles to counts
of FP* checks that passed,
and the total number of ontologies."""
result = defaultdict(int)
for ontology in data["ontologies"]:
result["total"] += 1
if "results" not in ontology:
continue
for key, value in ontology["results"].items():
if not key.startswith("FP"):
continue
if "status" not in value:
continue
if value["status"] != "PASS":
continue
result[key] += 1
return result
Comment on lines +37 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add validation for required data structure.

The function accesses data["ontologies"] without validation. If the YAML file is malformed or missing this key in an old commit, the script will crash with a KeyError.

Apply this diff to add validation:

 def summarize_run(data):
     """Given a dashboard-results YAML
     return a dictionary from principles to counts
     of FP* checks that passed,
     and the total number of ontologies."""
     result = defaultdict(int)
+    if "ontologies" not in data:
+        return result
     for ontology in data["ontologies"]:
         result["total"] += 1
         if "results" not in ontology:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def summarize_run(data):
"""Given a dashboard-results YAML
return a dictionary from principles to counts
of FP* checks that passed,
and the total number of ontologies."""
result = defaultdict(int)
for ontology in data["ontologies"]:
result["total"] += 1
if "results" not in ontology:
continue
for key, value in ontology["results"].items():
if not key.startswith("FP"):
continue
if "status" not in value:
continue
if value["status"] != "PASS":
continue
result[key] += 1
return result
def summarize_run(data):
"""Given a dashboard-results YAML
return a dictionary from principles to counts
of FP* checks that passed,
and the total number of ontologies."""
result = defaultdict(int)
if "ontologies" not in data:
return result
for ontology in data["ontologies"]:
result["total"] += 1
if "results" not in ontology:
continue
for key, value in ontology["results"].items():
if not key.startswith("FP"):
continue
if "status" not in value:
continue
if value["status"] != "PASS":
continue
result[key] += 1
return result
🤖 Prompt for AI Agents
In util/summarize.py around lines 37 to 55, the function reads
data["ontologies"] without validating input; add a guard that verifies data is a
dict and contains an "ontologies" key whose value is a list. If the check fails,
either raise a clear ValueError (e.g. "invalid dashboard-results: missing or
non-list 'ontologies'") or return an empty defaultdict(int) so the caller can
handle malformed YAML, then proceed with the existing loop using the validated
value.



def read_yaml(path, commit):
"""Given a file path and a commit hash,
read the file as YAML and return the contents."""
process = subprocess.run(
["git", "show", commit + ":" + path],
check=True,
text=True,
stdout=subprocess.PIPE
)
return yaml.safe_load(process.stdout)


def main():
"""Read all the versions of dashboard-results
in the git history
and write a `summary.tsv` with counts by date."""
path = "dashboard/dashboard-results.yml"
commits = list_commits(path)
rows = []
for (date, commit) in commits:
data = read_yaml(path, commit)
row = summarize_run(data)
row["date"] = date
rows.append(row)

fieldnames = list(rows[-1].keys())
fieldnames.sort(key=str.casefold)
Comment on lines +83 to +84
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Handle empty commits list.

If no commits are found for the file (e.g., file doesn't exist at path, or git history issue), rows[-1] will raise an IndexError.

Apply this diff to add validation:

     fieldnames = list(rows[-1].keys())
+    if not rows:
+        print(f"No commits found for {path}")
+        return
+    fieldnames = list(rows[-1].keys())
     fieldnames.sort(key=str.casefold)

Wait, let me fix the placement:

+    if not rows:
+        print(f"No commits found for {path}")
+        return
+
     fieldnames = list(rows[-1].keys())
     fieldnames.sort(key=str.casefold)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fieldnames = list(rows[-1].keys())
fieldnames.sort(key=str.casefold)
if not rows:
print(f"No commits found for {path}")
return
fieldnames = list(rows[-1].keys())
fieldnames.sort(key=str.casefold)
🤖 Prompt for AI Agents
In util/summarize.py around lines 83-84, the code assumes rows is non-empty and
uses rows[-1] which will raise IndexError when no commits are found; update the
function to first check if rows is empty and handle that case (e.g., return
early with an empty result or raise a clear ValueError/RuntimeError with a
descriptive message), otherwise proceed to compute fieldnames from rows[-1] and
sort them; ensure callers of this function can handle the new empty-case
behavior.

with open("summary.tsv", "w") as f:
w = csv.DictWriter(
f, fieldnames=fieldnames, delimiter="\t", lineterminator="\n"
)
w.writeheader()
w.writerows(rows)


if __name__ == "__main__":
main()