|
| 1 | +# Compare the content of histograms produced by ttbar_analysis_pipeline with a reference file. |
| 2 | +# A reference file for N_FILES_MAX_PER_SAMPLE=1 is available in directory `reference/`. |
| 3 | + |
| 4 | +from __future__ import annotations |
| 5 | +import argparse |
| 6 | +from collections import defaultdict |
| 7 | +import json |
| 8 | +import numpy as np |
| 9 | +import sys |
| 10 | +import uproot |
| 11 | + |
| 12 | +def parse_args() -> argparse.Namespace: |
| 13 | + parser = argparse.ArgumentParser() |
| 14 | + parser.add_argument("--histos", help="ROOT file containing the output histograms. Defaults to './histograms.root'.", default="histograms.root") |
| 15 | + group = parser.add_mutually_exclusive_group(required=True) |
| 16 | + group.add_argument("--reference", help="JSON reference against which histogram contents should be compared") |
| 17 | + group.add_argument("--dump-json", help="Print JSON representation of histogram contents to screen", action='store_true') |
| 18 | + return parser.parse_args() |
| 19 | + |
| 20 | +# convert uproot file containing only TH1Ds to a corresponding JSON-compatible dict with structure: |
| 21 | +# { "histo1": { "edges": [...], "contents": [...] }, "histo2": { ... }, ... } |
| 22 | +# Only the highest namecycle for every histogram is considered, and cycles are stripped from the histogram names. |
| 23 | +def as_dict(f: uproot.ReadOnlyDirectory) -> dict[str, dict]: |
| 24 | + histos = defaultdict(dict) |
| 25 | + # this assumes that the rightmost ";" (if any) comes before a namecycle |
| 26 | + names = set(k.rsplit(";", 1)[0] for k in f) |
| 27 | + for name in names: |
| 28 | + h = f[name] |
| 29 | + assert isinstance(h, uproot.behaviors.TH1.Histogram) |
| 30 | + histos[name]["edges"] = h.axis().edges().tolist() |
| 31 | + histos[name]["contents"] = h.counts(flow=True).tolist() |
| 32 | + return histos |
| 33 | + |
| 34 | +def validate(histos: dict, reference: dict) -> dict[str, list[str]]: |
| 35 | + errors = defaultdict(list) |
| 36 | + for name, ref_h in reference.items(): |
| 37 | + if name not in histos: |
| 38 | + errors[name].append("Histogram not found.") |
| 39 | + continue |
| 40 | + |
| 41 | + h = histos[name] |
| 42 | + if not np.allclose(h['edges'], ref_h['edges']): |
| 43 | + errors[name].append(f"Edges do not match:\n\tgot {h['edges']}\n\texpected {ref_h['edges']}") |
| 44 | + contents_depend_on_rng = "pt_res_up" in name # skip checking the contents of these histograms as they are not stable |
| 45 | + if not contents_depend_on_rng and not np.allclose(h['contents'], ref_h['contents']): |
| 46 | + errors[name].append(f"Contents do not match:\n\tgot {h['contents']}\n\texpected {ref_h['contents']}") |
| 47 | + |
| 48 | + return errors |
| 49 | + |
| 50 | +if __name__ == "__main__": |
| 51 | + args = parse_args() |
| 52 | + with uproot.open(args.histos) as f: |
| 53 | + histos = as_dict(f) |
| 54 | + |
| 55 | + if args.dump_json: |
| 56 | + print(json.dumps(histos, indent=2, sort_keys=True)) |
| 57 | + sys.exit(0) |
| 58 | + |
| 59 | + with open(args.reference) as reference: |
| 60 | + ref_histos = json.load(reference) |
| 61 | + |
| 62 | + print(f"Validating '{args.histos}' against reference '{args.reference}'...") |
| 63 | + errs = validate(histos=histos, reference=ref_histos) |
| 64 | + if len(errs) == 0: |
| 65 | + print("All good!") |
| 66 | + else: |
| 67 | + for hist_name, errors in errs.items(): |
| 68 | + errors = '\n\t'.join(errors) |
| 69 | + print(f"{hist_name}\n\t{errors}") |
| 70 | + sys.exit(1) |
0 commit comments