Skip to content

Commit 997eccc

Browse files
allow bin width normalization in data/MC plots
1 parent d5e2241 commit 997eccc

File tree

4 files changed

+32
-5
lines changed

4 files changed

+32
-5
lines changed

config_example.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Regions:
88
- Name: "Signal_region"
99
Variable: "jet_pt"
1010
Filter: "lep_charge > 0"
11-
Binning: [200, 300, 400, 500, 600]
11+
Binning: [200, 300, 400, 600]
1212

1313
Samples:
1414
- Name: "Data"

example.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,8 @@
4949
cabinetry.tabulate.yields(prediction_postfit, data)
5050

5151
# visualize pre- and post-fit distributions
52-
cabinetry.visualize.data_mc(prediction_prefit, data, config=config)
52+
plot_options = {"Signal_region": {"normalize_binwidth": (100, "GeV")}}
53+
cabinetry.visualize.data_mc(
54+
prediction_prefit, data, config=config, plot_options=plot_options
55+
)
5356
cabinetry.visualize.data_mc(prediction_postfit, data, config=config)

src/cabinetry/visualize/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def data_mc(
162162
log_scale_x: bool = False,
163163
channels: Optional[Union[str, List[str]]] = None,
164164
colors: Optional[Dict[str, str]] = None,
165+
plot_options: Optional[Dict[str, dict]] = None,
165166
close_figure: bool = False,
166167
save_figure: bool = True,
167168
) -> Optional[List[Dict[str, Any]]]:
@@ -189,6 +190,8 @@ def data_mc(
189190
or list of names to include, defaults to None (uses all channels)
190191
colors (Optional[Dict[str, str]], optional): map of sample names and colors to
191192
use in plot, defaults to None (uses default colors)
193+
plot_options (Optional[Dict[str, dict]], optional): plotting configuration
194+
per region, defaults to None (no additional configuration)
192195
close_figure (bool, optional): whether to close each figure, defaults to False
193196
(enable when producing many figures to avoid memory issues, prevents
194197
automatic rendering in notebooks)
@@ -213,6 +216,8 @@ def data_mc(
213216
f"colors need to be provided for all samples, missing for {c_missing}"
214217
)
215218

219+
plot_options = plot_options or {} # no additional plot options by default
220+
216221
# channels to include in plot, with optional filtering applied
217222
filtered_channels = model_utils._filter_channels(model_prediction.model, channels)
218223

@@ -283,6 +288,7 @@ def data_mc(
283288
log_scale_x=log_scale_x,
284289
label=label,
285290
colors=colors,
291+
plot_options=plot_options.get(channel_name, None),
286292
close_figure=close_figure,
287293
)
288294
figure_dict_list.append({"figure": fig, "region": channel_name})

src/cabinetry/visualize/plot_model.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def data_mc(
3131
log_scale_x: bool = False,
3232
label: str = "",
3333
colors: Optional[Dict[str, str]] = None,
34+
plot_options: Optional[Dict[str, Any]] = None,
3435
close_figure: bool = False,
3536
) -> mpl.figure.Figure:
3637
"""Draws a data/MC histogram with uncertainty bands and ratio panel.
@@ -51,6 +52,8 @@ def data_mc(
5152
label (str, optional): label written on the figure, defaults to ""
5253
colors (Optional[Dict[str, str]], optional): map of sample names and colors to
5354
use in plot, defaults to None (uses default colors)
55+
plot_options (Optional[Dict[str, Any]], optional): plotting configuration for
56+
this figure, defaults to None (no additional configuration)
5457
close_figure (bool, optional): whether to close each figure immediately after
5558
saving it, defaults to False (enable when producing many figures to avoid
5659
memory issues, prevents rendering in notebooks)
@@ -61,15 +64,26 @@ def data_mc(
6164
Returns:
6265
matplotlib.figure.Figure: the data/MC figure
6366
"""
67+
plot_options = plot_options or {} # no additional plot options by default
68+
69+
if "normalize_binwidth" in plot_options:
70+
rescaling_factor, unit = plot_options["normalize_binwidth"]
71+
bin_width_norm = (bin_edges[1:] - bin_edges[:-1]) / rescaling_factor
72+
else:
73+
unit = None
74+
bin_width_norm = np.ones_like(bin_edges)
75+
76+
total_model_unc /= bin_width_norm # apply bin width normalization
77+
6478
mc_histograms_yields = []
6579
mc_labels = []
6680
for h in histogram_dict_list:
6781
if h["isData"]:
68-
data_histogram_yields = h["yields"]
82+
data_histogram_yields = h["yields"] / bin_width_norm
6983
data_histogram_stdev = np.sqrt(data_histogram_yields)
7084
data_label = h["label"]
7185
else:
72-
mc_histograms_yields.append(h["yields"])
86+
mc_histograms_yields.append(h["yields"] / bin_width_norm)
7387
mc_labels.append(h["label"])
7488

7589
mpl.style.use(MPL_STYLE)
@@ -229,8 +243,12 @@ def data_mc(
229243
all_containers, all_labels, frameon=False, fontsize="large", loc="upper right"
230244
)
231245

246+
vertical_axis_label = "events"
247+
if unit is not None: # bin width normalization
248+
vertical_axis_label += f" / {rescaling_factor} {unit}"
249+
232250
ax1.set_xlim(bin_edges[0], bin_edges[-1])
233-
ax1.set_ylabel("events")
251+
ax1.set_ylabel(vertical_axis_label)
234252
ax1.set_xticklabels([])
235253
ax1.set_xticklabels([], minor=True)
236254
ax1.tick_params(axis="both", which="major", pad=8) # tick label - axis padding

0 commit comments

Comments
 (0)