allow bin width normalization in data/MC plots

alexander-held · alexander-held · commit 997eccccbd22 · 2024-01-15T16:09:44.000+01:00
diff --git a/config_example.yml b/config_example.yml
@@ -8,7 +8,7 @@ Regions:
   - Name: "Signal_region"
     Variable: "jet_pt"
     Filter: "lep_charge > 0"
-    Binning: [200, 300, 400, 500, 600]
+    Binning: [200, 300, 400, 600]
 
 Samples:
   - Name: "Data"
diff --git a/example.py b/example.py
@@ -49,5 +49,8 @@
     cabinetry.tabulate.yields(prediction_postfit, data)
 
     # visualize pre- and post-fit distributions
-    cabinetry.visualize.data_mc(prediction_prefit, data, config=config)
+    plot_options = {"Signal_region": {"normalize_binwidth": (100, "GeV")}}
+    cabinetry.visualize.data_mc(
+        prediction_prefit, data, config=config, plot_options=plot_options
+    )
     cabinetry.visualize.data_mc(prediction_postfit, data, config=config)
diff --git a/src/cabinetry/visualize/__init__.py b/src/cabinetry/visualize/__init__.py
@@ -162,6 +162,7 @@ def data_mc(
     log_scale_x: bool = False,
     channels: Optional[Union[str, List[str]]] = None,
     colors: Optional[Dict[str, str]] = None,
+    plot_options: Optional[Dict[str, dict]] = None,
     close_figure: bool = False,
     save_figure: bool = True,
 ) -> Optional[List[Dict[str, Any]]]:
@@ -189,6 +190,8 @@ def data_mc(
             or list of names to include, defaults to None (uses all channels)
         colors (Optional[Dict[str, str]], optional): map of sample names and colors to
             use in plot, defaults to None (uses default colors)
+        plot_options (Optional[Dict[str, dict]], optional): plotting configuration
+            per region, defaults to None (no additional configuration)
         close_figure (bool, optional): whether to close each figure, defaults to False
             (enable when producing many figures to avoid memory issues, prevents
             automatic rendering in notebooks)
@@ -213,6 +216,8 @@ def data_mc(
                 f"colors need to be provided for all samples, missing for {c_missing}"
             )
 
+    plot_options = plot_options or {}  # no additional plot options by default
+
     # channels to include in plot, with optional filtering applied
     filtered_channels = model_utils._filter_channels(model_prediction.model, channels)
 
@@ -283,6 +288,7 @@ def data_mc(
             log_scale_x=log_scale_x,
             label=label,
             colors=colors,
+            plot_options=plot_options.get(channel_name, None),
             close_figure=close_figure,
         )
         figure_dict_list.append({"figure": fig, "region": channel_name})
diff --git a/src/cabinetry/visualize/plot_model.py b/src/cabinetry/visualize/plot_model.py
@@ -31,6 +31,7 @@ def data_mc(
     log_scale_x: bool = False,
     label: str = "",
     colors: Optional[Dict[str, str]] = None,
+    plot_options: Optional[Dict[str, Any]] = None,
     close_figure: bool = False,
 ) -> mpl.figure.Figure:
     """Draws a data/MC histogram with uncertainty bands and ratio panel.
@@ -51,6 +52,8 @@ def data_mc(
         label (str, optional): label written on the figure, defaults to ""
         colors (Optional[Dict[str, str]], optional): map of sample names and colors to
             use in plot, defaults to None (uses default colors)
+        plot_options (Optional[Dict[str, Any]], optional): plotting configuration for
+            this figure, defaults to None (no additional configuration)
         close_figure (bool, optional): whether to close each figure immediately after
             saving it, defaults to False (enable when producing many figures to avoid
             memory issues, prevents rendering in notebooks)
@@ -61,15 +64,26 @@ def data_mc(
     Returns:
         matplotlib.figure.Figure: the data/MC figure
     """
+    plot_options = plot_options or {}  # no additional plot options by default
+
+    if "normalize_binwidth" in plot_options:
+        rescaling_factor, unit = plot_options["normalize_binwidth"]
+        bin_width_norm = (bin_edges[1:] - bin_edges[:-1]) / rescaling_factor
+    else:
+        unit = None
+        bin_width_norm = np.ones_like(bin_edges)
+
+    total_model_unc /= bin_width_norm  # apply bin width normalization
+
     mc_histograms_yields = []
     mc_labels = []
     for h in histogram_dict_list:
         if h["isData"]:
-            data_histogram_yields = h["yields"]
+            data_histogram_yields = h["yields"] / bin_width_norm
             data_histogram_stdev = np.sqrt(data_histogram_yields)
             data_label = h["label"]
         else:
-            mc_histograms_yields.append(h["yields"])
+            mc_histograms_yields.append(h["yields"] / bin_width_norm)
             mc_labels.append(h["label"])
 
     mpl.style.use(MPL_STYLE)
@@ -229,8 +243,12 @@ def data_mc(
         all_containers, all_labels, frameon=False, fontsize="large", loc="upper right"
     )
 
+    vertical_axis_label = "events"
+    if unit is not None:  # bin width normalization
+        vertical_axis_label += f" / {rescaling_factor} {unit}"
+
     ax1.set_xlim(bin_edges[0], bin_edges[-1])
-    ax1.set_ylabel("events")
+    ax1.set_ylabel(vertical_axis_label)
     ax1.set_xticklabels([])
     ax1.set_xticklabels([], minor=True)
     ax1.tick_params(axis="both", which="major", pad=8)  # tick label - axis padding