Update verify tests to use python

allenbaron · allenbaron · commit 75e17fd7ccf2 · 2026-03-16T15:09:14.000-04:00
diff --git a/Makefile b/Makefile
@@ -125,52 +125,6 @@ $(FASTOBO): | build
 		fi ; \
 	fi
 
-# ----------------------------------------
-# FILE UTILITIES
-# ----------------------------------------
-
-# cleans csv files from a directory, optionally matching pattern(s)
-#  --> to prevent existing file inclusion in concat_csv
-# args = input-directory, pattern(s)-to-match-files (should end with .csv)
-define clean_existing_csv
-	@PATTERN=($(2)) ; \
-	if [ "$$PATTERN" ]; then \
-		TMP_FILES=$$(find $(1) -name "$(firstword $(2))" $(patsubst %,-o -name "%",$(wordlist 2,$(words $(2)),$(2)))) ; \
-	else \
-		TMP_FILES=$$(find $(1) -name "*.csv") ; \
-	fi ; \
-	if [ "$$TMP_FILES" ]; then \
-		rm -f $$TMP_FILES ; \
-	fi
-endef
-
-# concatenate multiple CSV files into one
-# args = file category ('TEST' to error, if output), output-file, input-directory, pattern(s)-to-match-files (should end with .csv)
-define concat_csv
-	@PATTERN=($(4)) ; \
-	if [ "$$PATTERN" ]; then \
-		TMP_FILES=$$(find $(3) -name "$(firstword $(4))" $(patsubst %,-o -name "%",$(wordlist 2,$(words $(4)),$(4)))) ; \
-	else \
-		TMP_FILES=$$(find $(3) -name "*.csv") ; \
-	fi ; \
-	if [ "$$TMP_FILES" ]; then \
-		awk 'BEGIN { OFS = FS = "," } ; { \
-			if (FNR == 1) { \
-				gsub(/^.*\/|\.csv/, "", FILENAME) ; \
-				if (NR != 1) { print "" } ; \
-				print "$(1): " FILENAME ; print $$0 \
-			} \
-			else { print $$0 } \
-		}' $$TMP_FILES > $(2) \
-        && rm -f $$TMP_FILES ; \
-		if [ "$(1)" = "TEST" ] ; then \
-			exit 1 ; \
-		fi ; \
-	elif [ "$(1)" = "TEST" ]; then \
-		echo "" > $(2) ; \
-	fi
-endef
-
 
 ##########################################
 ## CI TESTS & DIFF
@@ -218,34 +172,42 @@ build/doid-edit-reasoned.owl: $(EDIT) | check_robot build
 	 --output $@
 	@echo -e "\n## Reasoning completed successfully!"
 
-# Verify doid-edit.owl
+# Verify *-edit.owl
 EDIT_V_QUERIES := $(wildcard src/sparql/verify/edit-verify-*.rq src/sparql/verify/verify-*.rq)
+EDIT_V_RES := $(patsubst src/sparql/verify/%.rq,build/reports/temp/%.csv,$(EDIT_V_QUERIES))
 
 .PRECIOUS: build/reports/edit-verify.csv
 verify-edit: build/reports/edit-verify.csv
 build/reports/edit-verify.csv: $(EDIT) | check_robot build/reports/temp
-	$(call clean_existing_csv,$(word 2,$|),edit-verify-*.csv verify-*.csv)
+	@rm -f $(EDIT_V_RES)
 	@$(ROBOT) verify \
 	 --input $< \
 	 --queries $(EDIT_V_QUERIES) \
 	 --fail-on-violation false \
 	 --output-dir $(word 2,$|)
-	$(call concat_csv,TEST,$@,$(word 2,$|),edit-verify-*.csv verify-*.csv)
+	@python3 src/util/concat_csv.py \
+	 --input $(EDIT_V_RES) \
+	 --category TEST \
+	 --output $@
 
-# Verify of doid-edit.owl that should be run quarterly (not part of release)
+# Verify of *-edit.owl that should be run quarterly (not part of release)
 QUARTER_V_QUERIES := $(wildcard src/sparql/verify/quarter-verify-*.rq)
+QUARTER_V_RES := $(patsubst src/sparql/verify/%.rq,build/reports/temp/%.csv,$(QUARTER_V_QUERIES))
 
 .PRECIOUS: build/reports/quarterly_test.csv
 quarterly_test: build/reports/quarterly_test.csv
 build/reports/quarterly_test.csv: $(EDIT) | check_robot build/reports/temp
 	@echo "Verifying $<..."
-	$(call clean_existing_csv,$(word 2,$|),quarter-verify-*.csv)
+	@rm -f $(QUARTER_V_RES)
 	@$(ROBOT) verify \
 	 --input $< \
 	 --queries $(QUARTER_V_QUERIES) \
 	 --fail-on-violation false \
 	 --output-dir $(word 2,$|)
-	$(call concat_csv,TEST,$@,$(word 2,$|),quarter-verify-*.csv)
+	@python3 src/util/concat_csv.py \
+	 --input $(QUARTER_V_RES) \
+	 --category TEST \
+	 --output $@
 
 # ----------------------------------------
 # DIFF
diff --git a/src/util/concat_csv.py b/src/util/concat_csv.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""
+concat_csv.py
+-------------
+Concatenates multiple CSV files into a single output CSV file.
+
+- Accepts a list of file paths as input; files that do not exist are silently skipped.
+- Prepends each input file's content with a header including the category and file name.
+- Removes the input files after concatenation.
+- Always creates an output file, even if no input files are found.
+- If the category is 'TEST', exits with error code 1 IF any input files are processed.
+
+Usage:
+    python3 concat_csv.py --input [file1 file2 ...] --category <category> --output <output_file>
+
+Example:
+    python3 concat_csv.py --input file1.csv file2.csv --category TEST --output output.csv
+"""
+import sys
+import os
+import csv
+import argparse
+
+
+def concat_csv(category, output_file, input_files):
+    files = sorted(f for f in input_files if os.path.isfile(f))
+    if not files:
+        # No files exist - create empty output and exit successfully
+        open(output_file, "w").close()
+        return
+    # Files found - concatenate into output
+    with open(output_file, "w", newline="") as out_f:
+        writer = csv.writer(out_f)
+        for idx, file in enumerate(files):
+            with open(file, newline="") as in_f:
+                reader = csv.reader(in_f)
+                out_f.write(
+                    f"{category}: {os.path.splitext(os.path.basename(file))[0]}\n"
+                )
+                for row in reader:
+                    writer.writerow(row)
+                # write single blank line between sections, but not after the last file
+                if idx != len(files) - 1:
+                    out_f.write("\n")
+    # Remove input files after concatenation
+    for file in files:
+        try:
+            os.remove(file)
+        except Exception as e:
+            print(f"Warning: could not remove {file}: {e}", file=sys.stderr)
+    if category == "TEST":
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Concatenate CSV files into a single output file."
+    )
+    parser.add_argument(
+        "--category", required=True, help="Category label for each section header"
+    )
+    parser.add_argument("--output", required=True, help="Output CSV file path")
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        metavar="file",
+        help="Input CSV file paths (non-existent files are silently skipped)",
+    )
+    args = parser.parse_args()
+    concat_csv(args.category, args.output, args.input)
diff --git a/src/util/diff-re.py b/src/util/diff-re.py
@@ -139,11 +139,11 @@ def compare_re(old_data, new_data, delim1="\n", delim2="|"):
 
                 result = []
                 if same:
-                    result.append(f"[same]: {'|'.join(map(str, same))}")
+                    result.append(f"[same]: {'|'.join(map(str, sorted(same)))}")
                 if old:
-                    result.append(f"[old]: {'|'.join(map(str, old))}")
+                    result.append(f"[old]: {'|'.join(map(str, sorted(old)))}")
                 if new:
-                    result.append(f"[new]: {'|'.join(map(str, new))}")
+                    result.append(f"[new]: {'|'.join(map(str, sorted(new)))}")
                 entry[col] = delim1.join(result)
 
         # Add ONLY if entry has changes