diff --git a/config_example.yml b/config_example.yml index 681daca3..7e4e8c4b 100644 --- a/config_example.yml +++ b/config_example.yml @@ -9,6 +9,10 @@ Regions: Variable: "jet_pt" Filter: "lep_charge > 0" Binning: [200, 300, 400, 500, 600] + Rebin: + LowerIndex: -1 + UpperIndex: 8 + Steps: -5 Samples: - Name: "Data" diff --git a/example.py b/example.py index 83f7f827..6ae4b620 100644 --- a/example.py +++ b/example.py @@ -17,11 +17,13 @@ cabinetry.configuration.print_overview(config) # create template histograms - cabinetry.templates.build(config, method="uproot") + # cabinetry.templates.build(config, method="uproot") # perform histogram post-processing cabinetry.templates.postprocess(config) + raise SystemExit + # visualize systematic templates cabinetry.visualize.templates(config) diff --git a/src/cabinetry/schemas/config.json b/src/cabinetry/schemas/config.json index 62704b75..6c58c1c1 100644 --- a/src/cabinetry/schemas/config.json +++ b/src/cabinetry/schemas/config.json @@ -145,6 +145,10 @@ }, "uniqueItems": true }, + "Rebin": { + "description": "rebinning to apply", + "$ref": "#/definitions/rebin_setting" + }, "Filter": { "description": "selection criteria to apply", "type": "string" @@ -398,6 +402,27 @@ } ] }, + "rebin_setting": { + "title": "Rebin setting", + "$$target": "#/definitions/rebin_setting", + "description": "rebin settings for template histograms", + "type": "object", + "properties": { + "LowerIndex": { + "description": "zero-based lower index for histogram", + "type": "integer" + }, + "UpperIndex": { + "description": "zero-based upper index for histogram", + "type": "integer" + }, + "Steps": { + "description": "number of bins to merge", + "type": "integer" + } + }, + "additionalProperties": false + }, "smoothing_setting": { "title": "Smoothing setting", "$$target": "#/definitions/smoothing_setting", diff --git a/src/cabinetry/templates/postprocessor.py b/src/cabinetry/templates/postprocessor.py index 8cd2156d..cd68b09b 100644 --- a/src/cabinetry/templates/postprocessor.py +++ b/src/cabinetry/templates/postprocessor.py @@ -5,6 +5,7 @@ import pathlib from typing import Any, Dict, Literal, Optional +import boost_histogram as bh import numpy as np from cabinetry import configuration @@ -31,6 +32,30 @@ def _fix_stat_unc(histogram: histo.Histogram, name: str) -> None: histogram.stdev = np.nan_to_num(histogram.stdev, nan=0.0) +def _rebinning_slice(region: Dict[str, Any]) -> Optional[slice]: + """Returns the slice for rebinning a histogram or None otherwise. + + Args: + region (Dict[str, Any]): containing all region information + + Returns: + Optional[slice]: slice for rebinning or None + """ + rebinning_info = region.get("Rebin", {}) + if all( + [r not in ["LowerIndex", "UpperIndex", "Steps"] for r in rebinning_info.keys()] + ): + # no rebinning needed + return None + + lower_idx = rebinning_info.get("LowerIndex", None) + upper_idx = rebinning_info.get("UpperIndex", None) + steps = rebinning_info.get("Steps", slice(None)) + if steps < 1: + raise ValueError("steps for merging must be at least 1") + return slice(lower_idx, upper_idx, bh.rebin(steps)) + + def _apply_353qh_twice( variation: histo.Histogram, nominal: histo.Histogram, name: str ) -> None: @@ -91,6 +116,7 @@ def apply_postprocessing( histogram: histo.Histogram, name: str, *, + rebinning_slice: Optional[slice] = None, smoothing_algorithm: Optional[str] = None, nominal_histogram: Optional[histo.Histogram] = None, ) -> histo.Histogram: @@ -103,6 +129,8 @@ def apply_postprocessing( Args: histogram (cabinetry.histo.Histogram): the histogram to postprocess name (str): histogram name for logging + rebinning_slice (Optional[slice]): rebinning to apply, defaults to None (no re- + binning applied) smoothing_algorithm (Optional[str]): name of smoothing algorithm to apply, defaults to None (no smoothing done) nominal_histogram (Optional[cabinetry.histo.Histogram]): nominal histogram @@ -113,7 +141,16 @@ def apply_postprocessing( """ # copy histogram to new object to leave it unchanged modified_histogram = copy.deepcopy(histogram) + + # apply rebinning + if rebinning_slice is not None: + modified_histogram = modified_histogram[ + rebinning_slice + ] # type: ignore[assignment] + _fix_stat_unc(modified_histogram, name) + + # smoothing if smoothing_algorithm is not None: if smoothing_algorithm == "353QH, twice": if nominal_histogram is None: @@ -121,6 +158,7 @@ def apply_postprocessing( _apply_353qh_twice(modified_histogram, nominal_histogram, name) else: log.warning(f"unknown smoothing algorithm {smoothing_algorithm}") + return modified_histogram @@ -162,6 +200,10 @@ def process_template( ) histogram_name = histo.name(region, sample, systematic, template=template) + # rebinning information from config + rebinning_slice = _rebinning_slice(region) + + # smoothing algorithm from config smoothing_algorithm = _smoothing_algorithm(region, sample, systematic) if smoothing_algorithm is None: nominal_histogram = None @@ -173,9 +215,11 @@ def process_template( histogram_folder, region, sample, {}, modified=False ) + log.info("edges before rebinning", histogram.bins) # to be removed new_histogram = apply_postprocessing( histogram, histogram_name, + rebinning_slice=rebinning_slice, smoothing_algorithm=smoothing_algorithm, nominal_histogram=nominal_histogram, ) @@ -183,4 +227,6 @@ def process_template( new_histo_path = histogram_folder / (histogram_name + "_modified") new_histogram.save(new_histo_path) + log.info("edges after rebinning", new_histogram.bins) # to be removed + return process_template