Skip to content

Commit 9a0a3db

Browse files
feat: add validation script and histogram references (#170)
* add histogram validation script * add reference counts for various file settings
1 parent 1d45a2e commit 9a0a3db

10 files changed

+65932
-0
lines changed

analyses/cms-open-data-ttbar/reference/histos_100_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_10_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_1_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_200_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_20_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_2_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_50_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_5_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.

analyses/cms-open-data-ttbar/reference/histos_all_file_per_process.json

Lines changed: 7318 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Compare the content of histograms produced by ttbar_analysis_pipeline with a reference file.
2+
# A reference file for N_FILES_MAX_PER_SAMPLE=1 is available in directory `reference/`.
3+
4+
from __future__ import annotations
5+
import argparse
6+
from collections import defaultdict
7+
import json
8+
import numpy as np
9+
import sys
10+
import uproot
11+
12+
def parse_args() -> argparse.Namespace:
13+
parser = argparse.ArgumentParser()
14+
parser.add_argument("--histos", help="ROOT file containing the output histograms. Defaults to './histograms.root'.", default="histograms.root")
15+
group = parser.add_mutually_exclusive_group(required=True)
16+
group.add_argument("--reference", help="JSON reference against which histogram contents should be compared")
17+
group.add_argument("--dump-json", help="Print JSON representation of histogram contents to screen", action='store_true')
18+
return parser.parse_args()
19+
20+
# convert uproot file containing only TH1Ds to a corresponding JSON-compatible dict with structure:
21+
# { "histo1": { "edges": [...], "contents": [...] }, "histo2": { ... }, ... }
22+
# Only the highest namecycle for every histogram is considered, and cycles are stripped from the histogram names.
23+
def as_dict(f: uproot.ReadOnlyDirectory) -> dict[str, dict]:
24+
histos = defaultdict(dict)
25+
# this assumes that the rightmost ";" (if any) comes before a namecycle
26+
names = set(k.rsplit(";", 1)[0] for k in f)
27+
for name in names:
28+
h = f[name]
29+
assert isinstance(h, uproot.behaviors.TH1.Histogram)
30+
histos[name]["edges"] = h.axis().edges().tolist()
31+
histos[name]["contents"] = h.counts(flow=True).tolist()
32+
return histos
33+
34+
def validate(histos: dict, reference: dict) -> dict[str, list[str]]:
35+
errors = defaultdict(list)
36+
for name, ref_h in reference.items():
37+
if name not in histos:
38+
errors[name].append("Histogram not found.")
39+
continue
40+
41+
h = histos[name]
42+
if not np.allclose(h['edges'], ref_h['edges']):
43+
errors[name].append(f"Edges do not match:\n\tgot {h['edges']}\n\texpected {ref_h['edges']}")
44+
contents_depend_on_rng = "pt_res_up" in name # skip checking the contents of these histograms as they are not stable
45+
if not contents_depend_on_rng and not np.allclose(h['contents'], ref_h['contents']):
46+
errors[name].append(f"Contents do not match:\n\tgot {h['contents']}\n\texpected {ref_h['contents']}")
47+
48+
return errors
49+
50+
if __name__ == "__main__":
51+
args = parse_args()
52+
with uproot.open(args.histos) as f:
53+
histos = as_dict(f)
54+
55+
if args.dump_json:
56+
print(json.dumps(histos, indent=2, sort_keys=True))
57+
sys.exit(0)
58+
59+
with open(args.reference) as reference:
60+
ref_histos = json.load(reference)
61+
62+
print(f"Validating '{args.histos}' against reference '{args.reference}'...")
63+
errs = validate(histos=histos, reference=ref_histos)
64+
if len(errs) == 0:
65+
print("All good!")
66+
else:
67+
for hist_name, errors in errs.items():
68+
errors = '\n\t'.join(errors)
69+
print(f"{hist_name}\n\t{errors}")
70+
sys.exit(1)

0 commit comments

Comments
 (0)