Skip to content

Commit 7971200

Browse files
Add reporting table and use isVHSig (#124)
* add reporting utility for v2.4.1 * Update config/file_list.json * remove ZH_Sig and WH_Sig from ALL_TAGS * Update samples and correct cross sections (#126) * Switch ZZ/WZ samples from Sherpa 2.2.2 to 2.2.12 * Reduce cross sections of some Higgs samples * Files in order * fix special sumWeight * move tags to separate file --------- Co-authored-by: Gabriel Rabanal <[email protected]>
1 parent 9881bc1 commit 7971200

11 files changed

+357
-77
lines changed

Manifest.toml

+22-33
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
# This file is machine-generated - editing it directly is not advised
22

3-
julia_version = "1.9.1"
3+
julia_version = "1.9.2"
44
manifest_format = "2.0"
5-
project_hash = "4b9a1481b85740f2967ad8f317dd32906ccc0768"
5+
project_hash = "9a383cbb4bfed3d6d7889f4920aea58c6ffc63bd"
66

77
[[deps.AbstractFFTs]]
88
deps = ["LinearAlgebra"]
9-
git-tree-sha1 = "8bc0aaec0ca548eb6cf5f0d7d16351650c1ee956"
9+
git-tree-sha1 = "cad4c758c0038eea30394b1b671526921ca85b21"
1010
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
11-
version = "1.3.2"
11+
version = "1.4.0"
1212
weakdeps = ["ChainRulesCore"]
1313

1414
[deps.AbstractFFTs.extensions]
@@ -128,11 +128,6 @@ git-tree-sha1 = "5d5dda960067751bc1534aba765f771325044501"
128128
uuid = "000d9b38-65fe-4c81-bdb9-69f01f102479"
129129
version = "1.0.7"
130130

131-
[[deps.BitFlags]]
132-
git-tree-sha1 = "43b1a4a8f797c1cddadf60499a8a077d4af2cd2d"
133-
uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
134-
version = "0.1.7"
135-
136131
[[deps.BitIntegers]]
137132
deps = ["Random"]
138133
git-tree-sha1 = "abb894fb55122b4604af0d460d3018e687a60963"
@@ -286,7 +281,7 @@ weakdeps = ["Dates", "LinearAlgebra"]
286281
[[deps.CompilerSupportLibraries_jll]]
287282
deps = ["Artifacts", "Libdl"]
288283
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
289-
version = "1.0.2+0"
284+
version = "1.0.5+0"
290285

291286
[[deps.CompositionsBase]]
292287
git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad"
@@ -409,12 +404,6 @@ git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237"
409404
uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
410405
version = "1.0.4"
411406

412-
[[deps.ExceptionUnwrapping]]
413-
deps = ["Test"]
414-
git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96"
415-
uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4"
416-
version = "0.1.9"
417-
418407
[[deps.Expat_jll]]
419408
deps = ["Artifacts", "JLLWrappers", "Libdl"]
420409
git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c"
@@ -478,9 +467,9 @@ uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
478467

479468
[[deps.FillArrays]]
480469
deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
481-
git-tree-sha1 = "0b3b52afd0f87b0a3f5ada0466352d125c9db458"
470+
git-tree-sha1 = "2250347838b28a108d1967663cba57bfb3c02a58"
482471
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
483-
version = "1.2.1"
472+
version = "1.3.0"
484473

485474
[[deps.FixedPointNumbers]]
486475
deps = ["Statistics"]
@@ -594,10 +583,10 @@ uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe"
594583
version = "1.0.2"
595584

596585
[[deps.HTTP]]
597-
deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
598-
git-tree-sha1 = "7f5ef966a02a8fdf3df2ca03108a88447cb3c6f0"
586+
deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"]
587+
git-tree-sha1 = "0fa77022fe4b511826b39c894c90daf5fce3334a"
599588
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
600-
version = "1.9.8"
589+
version = "0.9.17"
601590

602591
[[deps.HarfBuzz_jll]]
603592
deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
@@ -657,6 +646,11 @@ git-tree-sha1 = "5cd07aab533df5170988219191dfad0519391428"
657646
uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
658647
version = "0.1.3"
659648

649+
[[deps.IniFile]]
650+
git-tree-sha1 = "f550e6e32074c939295eb5ea6de31849ac2c9625"
651+
uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
652+
version = "0.5.1"
653+
660654
[[deps.InitialValues]]
661655
git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3"
662656
uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c"
@@ -1095,12 +1089,6 @@ deps = ["Artifacts", "Libdl"]
10951089
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
10961090
version = "0.8.1+0"
10971091

1098-
[[deps.OpenSSL]]
1099-
deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
1100-
git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2"
1101-
uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
1102-
version = "1.4.1"
1103-
11041092
[[deps.OpenSSL_jll]]
11051093
deps = ["Artifacts", "JLLWrappers", "Libdl"]
11061094
git-tree-sha1 = "1aa4b74f80b01c6bc2b89992b861b5f210e665b5"
@@ -1192,7 +1180,7 @@ version = "0.42.2+0"
11921180
[[deps.Pkg]]
11931181
deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
11941182
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1195-
version = "1.9.0"
1183+
version = "1.9.2"
11961184

11971185
[[deps.PkgVersion]]
11981186
deps = ["Pkg"]
@@ -1410,11 +1398,6 @@ git-tree-sha1 = "d263a08ec505853a5ff1c1ebde2070419e3f28e9"
14101398
uuid = "73760f76-fbc4-59ce-8f25-708e95d2df96"
14111399
version = "0.4.0"
14121400

1413-
[[deps.SimpleBufferStream]]
1414-
git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1"
1415-
uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
1416-
version = "1.1.0"
1417-
14181401
[[deps.SimpleTraits]]
14191402
deps = ["InteractiveUtils", "MacroTools"]
14201403
git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
@@ -1695,6 +1678,12 @@ path = "WVZPythonExt"
16951678
uuid = "473e61d0-be27-49e9-a959-28cb081f953f"
16961679
version = "0.1.0"
16971680

1681+
[[deps.WVZReportExt]]
1682+
deps = ["FHist", "Measurements", "PrettyTables", "Serialization"]
1683+
path = "WVZReportExt"
1684+
uuid = "6febcfd3-d541-434d-ba4f-f35a3fbd23d7"
1685+
version = "0.1.0"
1686+
16981687
[[deps.WVZXGBoostExt]]
16991688
deps = ["Arrow", "CSV", "CUDA", "DataFrames", "Dates", "ROCCurves", "XGBoost"]
17001689
path = "WVZXGBoostExt"

Project.toml

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
88
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
99
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
1010
ClusterManagers = "34f1f09b-3a8b-5176-ab39-66d58a4d544e"
11+
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
1112
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
1213
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
1314
FHist = "68837c9b-b678-4cd5-9925-8a54edc8f695"
@@ -24,6 +25,7 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
2425
ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"
2526
UnROOT = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9"
2627
WVZPythonExt = "473e61d0-be27-49e9-a959-28cb081f953f"
28+
WVZReportExt = "6febcfd3-d541-434d-ba4f-f35a3fbd23d7"
2729
WVZXGBoostExt = "60698fcb-37af-470f-b275-26546941db85"
2830
XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
2931

WVZReportExt/Project.toml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
name = "WVZReportExt"
2+
uuid = "6febcfd3-d541-434d-ba4f-f35a3fbd23d7"
3+
authors = ["Moelf <[email protected]>"]
4+
version = "0.1.0"
5+
6+
[deps]
7+
FHist = "68837c9b-b678-4cd5-9925-8a54edc8f695"
8+
Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
9+
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
10+
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

WVZReportExt/src/WVZReportExt.jl

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
module WVZReportExt
2+
3+
export significance_table, print_sigtable
4+
5+
include(joinpath(@__DIR__, "../../src/alltags.jl"))
6+
7+
using PrettyTables, Serialization, Measurements, FHist
8+
9+
"""
10+
rebinscan(S, B; atleast=1, from=:right, by = (s,b) -> s/sqrt(b))
11+
12+
Take two `Hist1D`, try to find the best bin-edges for rebinning, given these two conditions:
13+
14+
- a metric to maximize, `s` and `b` are signal counts and background counts in each new bin
15+
- `atleast` specifcy the minimal number of `s+b` in each newly formed bin
16+
17+
`from` keyword argument can be used to specifcy the direction of the scan, defautls to `:right` assuming
18+
the high-er end of the histogram is signal-like and one of the histograms is monotonic.
19+
20+
The function returns the values of new bin-edges including both ends (of course, the ends are identical to before).
21+
22+
The metric can also be replaced by the more accurate `sqrt(2*((s + b) * log(1 + s/b) - s ))`.
23+
"""
24+
function rebinscan(S, B; atleast=1, from=:right, by = (s,b) -> s/sqrt(b))
25+
_binedges = binedges(S)
26+
_binedges == binedges(B) || error("Bin edges aren't compatible")
27+
Scounts = bincounts(S)
28+
Bcounts = bincounts(B)
29+
if from == :right
30+
Scounts = reverse(Scounts)
31+
BCounts = reverse(Bcounts)
32+
end
33+
34+
tempS = tempB = 0.0
35+
score = -Inf
36+
newEdges = [1] # first edge at the beginning
37+
for i = 2:lastindex(_binedges) # one more edge than bins
38+
s = Scounts[i-1]
39+
b = Bcounts[i-1]
40+
tempS += s
41+
tempB += b
42+
43+
tempS + tempB < atleast && continue # not enough statistics
44+
newScore = by(tempS, tempB)
45+
if newScore < score # score starts to drop
46+
push!(newEdges, i-1)
47+
tempS = s
48+
tempB = b
49+
score = -Inf
50+
end
51+
score = newScore
52+
end
53+
push!(newEdges, lastindex(_binedges))
54+
return _binedges[newEdges]
55+
end
56+
57+
function _significance(signal, bkg)
58+
S = integral(signal)
59+
B = integral(bkg)
60+
Sig = sqrt(2*((S + B) * log(1 + S/B) - S))
61+
dS = sqrt(sum(signal.sumw2))
62+
dB = sqrt(sum(bkg.sumw2))
63+
dSigdS = log(1 + S/B) / Sig
64+
dSigdB = (log(1 + S/B) - S/B) / Sig
65+
err = sqrt((dSigdS * dS)^2 + (dSigdB * dB)^2)
66+
return Sig ± err
67+
end
68+
69+
70+
function significance_matrix(input_dir::AbstractString)
71+
Ms = map(ALL_TAGS) do tag
72+
deserialize(joinpath(input_dir,"$(tag).jlserialize"))
73+
end
74+
significance_matrix(Ms)
75+
end
76+
77+
function significance_matrix(Ms)
78+
res = mapreduce(vcat, Ms) do M
79+
N = nbins(M[:DF__BDT__NOMINAL])
80+
hists = rebin.([M[:SFinZ__BDT__NOMINAL], M[:SFnoZ__BDT__NOMINAL], M[:DF__BDT__NOMINAL]], N)
81+
N = nbins(M[:ZZCR__Njet__NOMINAL])
82+
push!(hists, rebin(M[:ZZCR__Njet__NOMINAL], N))
83+
N = nbins(M[:ttZCR__Njet__NOMINAL])
84+
push!(hists, rebin(M[:ttZCR__Njet__NOMINAL], N))
85+
86+
permutedims(hists)
87+
end
88+
res
89+
end
90+
91+
"""
92+
significance_table()
93+
significance_table(significance_matrix(); )
94+
95+
# Examples
96+
97+
```julia
98+
julia> M = significance_table()
99+
14×6 Matrix{Any}:
100+
"Signal" 10.66±0.067 … 1.072±0.017 2.151±0.037
101+
"ZZ" 1219.9±5.4 555.7±2.4 69.19±0.74
102+
"Zjets" -0.019±0.13 -0.0004±0.0004 0.63±0.39
103+
"Zgamma" 0.0±0.0 0.0±0.0 0.0±0.0
104+
"ttbar" 0.0±0.0 0.0±0.0 0.43±0.13
105+
"WZ" 0.358±0.096 … 0.0044±0.0032 0.295±0.061
106+
"tZ" 0.012±0.012 0.0±0.0 0.179±0.043
107+
"ttZ" 1.231±0.08 0.0111±0.0074 73.62±0.61
108+
"tWZ" 0.56±0.11 0.0095±0.0095 14.71±0.56
109+
"VBS" 10.231±0.085 1.478±0.033 0.833±0.024
110+
"VH" 1.29±0.71 … 0.0±0.0 0.0±0.0
111+
"Others" 0.0568±0.0088 0.0021±0.0017 5.02±0.088
112+
"Bkg Tot." 1233.6±5.4 557.2±2.4 164.9±1.2
113+
"Significance" 0.3031±0.002 0.0454±0.00072 0.1672±0.0029
114+
```
115+
"""
116+
function significance_table(input_dir::AbstractString)
117+
body = significance_matrix(input_dir)
118+
significance_table(body)
119+
end
120+
121+
function significance_table(body::Matrix; recreate=false)
122+
total_sig = body[1:1, :] #first row
123+
total_bkg = mapreduce(sum, hcat, eachcol(body[2:end, :])) #2:end row
124+
sig_errors = _significance.(total_sig, total_bkg)
125+
combined_sig = sqrt(sum(x[1]^2 for x in sig_errors))
126+
127+
full_nums = [
128+
@. integral(body) ± only(binerrors(body))
129+
@. integral(total_bkg) ± only(binerrors(total_bkg))
130+
sig_errors
131+
]
132+
133+
full_body = [collect(ALL_TAGS)
134+
"Bkg Tot."
135+
"Significance" ;; full_nums
136+
];
137+
end
138+
139+
const sigtable_fmt = (v, i, j) -> v isa Number ? "$(round(Measurements.value(v); digits=2)) ± $(round(Measurements.uncertainty(v); digits=2))" : v
140+
141+
"""
142+
print_sigtable(full_table; io=stdout)
143+
144+
Takes the output of [`significance_table`](@ref) and pretty print it:
145+
146+
# Example
147+
148+
```julia
149+
julia> M = significance_table(<path>);
150+
151+
julia> print_sigtable(M)
152+
┌──────────────┬────────────────┬───────────────┬──────────────┬──────────────┬───────────────┐
153+
│ │ SF-inZ │ SF-noZ │ DF │ CR-ZZ │ CR-ttZ │
154+
├──────────────┼────────────────┼───────────────┼──────────────┼──────────────┼───────────────┤
155+
│ Signal │ 10.66 ± 0.07 │ 9.31 ± 0.1 │ 10.73 ± 0.14 │ 1.07 ± 0.02 │ 2.15 ± 0.04 │
156+
│ ZZ │ 1219.92 ± 5.38 │ 469.06 ± 2.44 │ 19.78 ± 0.45 │ 555.68 ± 2.4 │ 69.19 ± 0.74 │
157+
│ Zjets │ -0.02 ± 0.13 │ 2.6 ± 2.22 │ 6.47 ± 5.51 │ -0.0 ± 0.0 │ 0.63 ± 0.39 │
158+
│ Zgamma │ 0.0 ± 0.0 │ 0.0 ± 0.0 │ 0.3 ± 0.29 │ 0.0 ± 0.0 │ 0.0 ± 0.0 │
159+
│ ttbar │ 0.0 ± 0.0 │ 0.63 ± 0.18 │ 0.28 ± 0.1 │ 0.0 ± 0.0 │ 0.43 ± 0.13 │
160+
│ WZ │ 0.36 ± 0.1 │ 1.79 ± 0.23 │ 2.24 ± 0.29 │ 0.0 ± 0.0 │ 0.29 ± 0.06 │
161+
│ tZ │ 0.01 ± 0.01 │ 0.07 ± 0.03 │ 0.06 ± 0.02 │ 0.0 ± 0.0 │ 0.18 ± 0.04 │
162+
│ ttZ │ 1.23 ± 0.08 │ 4.71 ± 0.16 │ 5.74 ± 0.18 │ 0.01 ± 0.01 │ 73.62 ± 0.61 │
163+
│ tWZ │ 0.56 ± 0.11 │ 2.16 ± 0.23 │ 2.5 ± 0.24 │ 0.01 ± 0.01 │ 14.71 ± 0.56 │
164+
│ VBS │ 10.23 ± 0.09 │ 6.4 ± 0.08 │ 0.18 ± 0.01 │ 1.48 ± 0.03 │ 0.83 ± 0.02 │
165+
│ VH │ 1.29 ± 0.71 │ 5.76 ± 1.4 │ 5.77 ± 1.29 │ 0.0 ± 0.0 │ 0.0 ± 0.0 │
166+
├──────────────┼────────────────┼───────────────┼──────────────┼──────────────┼───────────────┤
167+
│ Others │ 0.06 ± 0.01 │ 0.4 ± 0.13 │ 0.56 ± 0.08 │ 0.0 ± 0.0 │ 5.02 ± 0.09 │
168+
├──────────────┼────────────────┼───────────────┼──────────────┼──────────────┼───────────────┤
169+
│ Bkg Tot. │ 1233.64 ± 5.43 │ 493.58 ± 3.61 │ 43.89 ± 5.7 │ 557.19 ± 2.4 │ 164.91 ± 1.19 │
170+
│ Significance │ 0.3 ± 0.0 │ 0.42 ± 0.0 │ 1.56 ± 0.1 │ 0.05 ± 0.0 │ 0.17 ± 0.0 │
171+
└──────────────┴────────────────┴───────────────┴──────────────┴──────────────┴───────────────┘
172+
```
173+
"""
174+
print_sigtable(full_table; io=stdout) = pretty_table(io,
175+
full_table;
176+
header = ["", "SF-inZ", "SF-noZ", "DF", "CR-ZZ", "CR-ttZ"],
177+
formatters = sigtable_fmt,
178+
body_hlines = [size(full_table, 1) - 3, size(full_table, 1) - 2],
179+
highlighters = hl_row([1], crayon"bold"),
180+
alignment=:c,
181+
)
182+
183+
end # module WVZReportExt

WVZXGBoostExt/src/WVZXGBoostExt.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using Dates, ROCCurves, Arrow, DataFrames, CSV
44
import XGBoost
55
using XGBoost: DMatrix
66

7-
const ALL_TAGS = ["Signal", "ZZ", "Zjets", "Zgamma", "ttbar", "WZ", "tZ", "ttZ", "tWZ", "VBS", "VH", "Others"];
7+
const ALL_TAGS = ["Signal", "ZZ", "Zjets", "ttbar", "WZ", "tZ", "ttZ", "tWZ", "VBS", "VH", "Others"];
88

99
const useful_features = [
1010
:leptonic_HT, :MET, :Zlep1_dphi, :Wlep1_pt, :total_HT, :Zlep2_dphi, :Zlep2_eta,

0 commit comments

Comments
 (0)