From 255b184c0eab92232ea32cd14fbe93d6aaf69135 Mon Sep 17 00:00:00 2001 From: maly Date: Thu, 20 Feb 2025 16:05:55 +0000 Subject: [PATCH 01/19] some initial implementation of a LightConfig carrying info from pyhf model --- src/cabinetry/model_utils.py | 67 +++++++++++++++++++++++++++++++++- tests/conftest.py | 70 ++++++++++++++++++++++++++++++++++++ tests/test_model_utils.py | 33 ++++++++++++++--- 3 files changed, 164 insertions(+), 6 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index ec6533f8..31d09b77 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -18,11 +18,73 @@ _YIELD_STDEV_CACHE: Dict[Any, Tuple[List[List[List[float]]], List[List[float]]]] = {} +class LightConfig(pyhf.pdf._ModelConfig): + def __init__( + self, + model: pyhf.pdf.Model, + samples_merge_map: Optional[Dict[str, List[str]]] = None, + ): + super().__init__(model.spec) + # self.channel_nbins = model.config.channel_nbins + # self.channels = model.config.channels + # self.channel_slices = model.config.channel_slices + # self.pyhf_model = model # Can this get too big and become a memory issue ? + # self.samples = model.config.samples + if samples_merge_map is not None: + self._update_samples(samples_merge_map) + + @property + def samples(self) -> list[str]: + """ + Ordered list of sample names in the model. + """ + return self._samples + + @samples.setter + def samples(self, samples): + """ + Set the Ordered list of sample names in the model. + """ + self._samples = samples + + def _update_samples(self, samples_merge_map: Optional[Dict[str, List[str]]]): + # Delete samples being merged from config + # Flatten all merged samples into a set for O(1) lookups + merged_samples_set = { + merged_sample + for merged_samples_list in samples_merge_map.values() + for merged_sample in merged_samples_list + } + merged_samples_idxs = [ + idx + for idx, sample in enumerate(self.samples) + if sample in merged_samples_set + ] + self.samples = np.delete(self.samples, merged_samples_idxs).tolist() + # Add new samples at the bottom of stack + self.samples = np.insert( + np.array(self.samples, dtype=object), + np.arange(len(samples_merge_map)), + list(samples_merge_map.keys()), + axis=0, + ).tolist() + + +class LightModel: + def __init__( + self, + model: pyhf.pdf.Model, + samples_merge_map: Optional[Dict[str, List[str]]] = None, + ): + self.config = LightConfig(model, samples_merge_map) + + class ModelPrediction(NamedTuple): """Model prediction with yields and total uncertainties per bin and channel. Args: - model (pyhf.pdf.Model): model to which prediction corresponds to + model (LightModel): model (a light-weight version of pyhf.pdf.Model) to + which prediction corresponds to model_yields (List[List[List[float]]]): yields per channel, sample and bin, indices: channel, sample, bin total_stdev_model_bins (List[List[List[float]]]): total yield uncertainty per @@ -420,6 +482,7 @@ def prediction( *, fit_results: Optional[FitResults] = None, label: Optional[str] = None, + samples_merge_map: Optional[Dict[str, List[str]]] = None, ) -> ModelPrediction: """Returns model prediction, including model yields and uncertainties. @@ -440,6 +503,8 @@ def prediction( Returns: ModelPrediction: model, yields and uncertainties per channel, sample, bin """ + light_model = LightModel(model, samples_merge_map) + log.debug(light_model.config.samples) if fit_results is not None: if fit_results.labels != model.config.par_names: log.warning("parameter names in fit results and model do not match") diff --git a/tests/conftest.py b/tests/conftest.py index f77307de..9cda94dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -234,6 +234,76 @@ def example_spec_with_background(): return spec +@pytest.fixture +def example_spec_with_multiple_background(): + spec = { + "channels": [ + { + "name": "Signal Region", + "samples": [ + { + "data": [50], + "modifiers": [ + { + "data": [5], + "name": "staterror_Signal-Region", + "type": "staterror", + }, + { + "data": None, + "name": "Signal strength", + "type": "normfactor", + }, + ], + "name": "Signal", + }, + { + "data": [150], + "modifiers": [ + { + "data": [7], + "name": "staterror_Signal-Region", + "type": "staterror", + } + ], + "name": "Background", + }, + { + "data": [20], + "modifiers": [ + { + "data": [1], + "name": "staterror_Signal-Region", + "type": "staterror", + }, + { + "data": None, + "name": "Background 2 norm", + "type": "normfactor", + }, + ], + "name": "Background 2", + }, + ], + } + ], + "measurements": [ + { + "config": { + "parameters": [ + {"name": "Signal strength", "bounds": [[0, 10]], "inits": [1.0]} + ], + "poi": "Signal strength", + }, + "name": "signal plus background", + } + ], + "observations": [{"data": [160], "name": "Signal Region"}], + "version": "1.0.0", + } + return spec + + @pytest.fixture def example_spec_no_aux(): spec = { diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index b1369d6d..bd498a5d 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -258,11 +258,12 @@ def test_yield_stdev(example_spec, example_spec_multibin): "cabinetry.model_utils.yield_stdev", side_effect=[ ( - [[[5.0, 2.0], [5.0, 2.0]], [[1.0], [1.0]]], - [[5.38516481, 5.38516481], [1.0, 1.0]], + [[[5.0, 2.0], [5.0, 2.0]], [[1.0], [1.0]]], # pre-fit, per-bin + [[5.38516481, 5.38516481], [1.0, 1.0]], # pre-fit, per-channel ), - ([[[0.3], [0.3]]], [[0.3, 0.3]]), - ([[[0.3], [0.3]]], [[0.3, 0.3]]), + ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit, single-channel nominal + ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit single-channel, custom + ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit single-channel, merged (values?) ], ) @mock.patch( @@ -278,7 +279,13 @@ def test_yield_stdev(example_spec, example_spec_multibin): ], ) def test_prediction( - mock_asimov, mock_unc, mock_stdev, example_spec_multibin, example_spec, caplog + mock_asimov, + mock_unc, + mock_stdev, + example_spec_multibin, + example_spec, + example_spec_with_multiple_background, + caplog, ): caplog.set_level(logging.DEBUG) model = pyhf.Workspace(example_spec_multibin).model() @@ -360,6 +367,22 @@ def test_prediction( assert model_pred.label == "abc" caplog.clear() + # Test with merging samples + model = pyhf.Workspace(example_spec_with_multiple_background).model() + fit_results = FitResults( + np.asarray([1.1, 1.01, 1.2]), + np.asarray([0.1, 0.03, 0.07]), + ["Signal strength", "staterror_Signal-Region[0]", "Background 2 norm"], + np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), + 0.0, + ) + model_pred = model_utils.prediction( + model, + fit_results=fit_results, + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + caplog.clear() + def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor): model = pyhf.Workspace(example_spec).model() From 942d62c49476595ce739d8be22db107335ae8bf3 Mon Sep 17 00:00:00 2001 From: maly Date: Thu, 20 Feb 2025 16:16:48 +0000 Subject: [PATCH 02/19] fix typing --- src/cabinetry/model_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 31d09b77..c1f9cf89 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -34,7 +34,7 @@ def __init__( self._update_samples(samples_merge_map) @property - def samples(self) -> list[str]: + def samples(self) -> List[str]: """ Ordered list of sample names in the model. """ From 789e91d8e1325b270aed8cf8039478d212b3b20c Mon Sep 17 00:00:00 2001 From: maly Date: Fri, 21 Feb 2025 12:25:55 +0000 Subject: [PATCH 03/19] merging samples in stdev, working but not fully tested --- src/cabinetry/model_utils.py | 108 +++++++++++++++++++++++++++++++---- tests/test_model_utils.py | 77 +++++++++++++++++++++---- 2 files changed, 162 insertions(+), 23 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index c1f9cf89..99f96096 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -25,14 +25,24 @@ def __init__( samples_merge_map: Optional[Dict[str, List[str]]] = None, ): super().__init__(model.spec) - # self.channel_nbins = model.config.channel_nbins - # self.channels = model.config.channels - # self.channel_slices = model.config.channel_slices - # self.pyhf_model = model # Can this get too big and become a memory issue ? - # self.samples = model.config.samples + self._original_config = model.config + # this is going to break if more config kwargs added in pyhf _ModelConfig + self.modifier_settings = self._original_config.modifier_settings + self.merged_samples_indices = None if samples_merge_map is not None: self._update_samples(samples_merge_map) + def __getattr__(self, name): + """ + Dynamically forward attribute access to the original config if not found. + """ + try: + return getattr(self._original_config, name) + except AttributeError as e: + raise AttributeError( + f"'LightConfig' object has no attribute '{name}'" + ) from e + @property def samples(self) -> List[str]: """ @@ -55,12 +65,13 @@ def _update_samples(self, samples_merge_map: Optional[Dict[str, List[str]]]): for merged_samples_list in samples_merge_map.values() for merged_sample in merged_samples_list } - merged_samples_idxs = [ + merged_samples_indices = [ idx for idx, sample in enumerate(self.samples) if sample in merged_samples_set ] - self.samples = np.delete(self.samples, merged_samples_idxs).tolist() + self.merged_samples_indices = merged_samples_indices + self.samples = np.delete(self.samples, merged_samples_indices).tolist() # Add new samples at the bottom of stack self.samples = np.insert( np.array(self.samples, dtype=object), @@ -76,7 +87,59 @@ def __init__( model: pyhf.pdf.Model, samples_merge_map: Optional[Dict[str, List[str]]] = None, ): + # super().__init__(model.spec) self.config = LightConfig(model, samples_merge_map) + self._pyhf_model = model + + def __getattr__(self, name): + """ + Dynamically forward attribute access to the original config if not found. + """ + try: + return getattr(self._pyhf_model, name) + except AttributeError as e: + raise AttributeError( + f"'LightModel' object has no attribute '{name}'" + ) from e + + @property + def pyhf_model(self): + return self._pyhf_model + + +def _merge_sample_yields( + model: LightModel, + old_yields: Union[List[List[List[float]]], List[List[float]]], + samples_merge_map: Dict[str, List[str]], + one_channel: Optional[bool] = False, +): + + def _sum_per_channel(i_ch=None): + if i_ch is not None: + old_yield = old_yields[i_ch] + else: + old_yield = old_yields + # for each channel, sum together the desired samples + summed_sample = np.sum( + np.asarray(old_yield)[model.config.merged_samples_indices], axis=0 + ) + # build set of remaining samples and remove the ones already summed + remaining_samples = np.delete( + old_yield, model.config.merged_samples_indices, axis=0 + ) + model_yields_one_channel = np.insert( + remaining_samples, np.arange(len(samples_merge_map)), summed_sample, axis=0 + ) + return model_yields_one_channel + + new_yields = [] + if not one_channel: + for i_ch in range(len(model.config.channels)): + new_yields.append(_sum_per_channel(i_ch=i_ch)) + else: + new_yields = _sum_per_channel() + + return np.array(new_yields) class ModelPrediction(NamedTuple): @@ -252,7 +315,7 @@ def prefit_uncertainties(model: pyhf.pdf.Model) -> np.ndarray: def _hashable_model_key( - model: pyhf.pdf.Model, + model: LightModel, # pyhf.pdf.Model, ) -> Tuple[str, Tuple[Tuple[str, str], ...]]: """Compute a hashable representation of the values that uniquely identify a model. @@ -283,10 +346,11 @@ def _hashable_model_key( def yield_stdev( - model: pyhf.pdf.Model, + model: LightModel, parameters: np.ndarray, uncertainty: np.ndarray, corr_mat: np.ndarray, + samples_merge_map: Optional[Dict[str, List[str]]] = None, ) -> Tuple[List[List[List[float]]], List[List[float]]]: """Calculates symmetrized model yield standard deviation per channel / sample / bin. @@ -298,7 +362,7 @@ def yield_stdev( of this function are cached to speed up subsequent calls with the same arguments. Args: - model (pyhf.pdf.Model): the model for which to calculate the standard deviations + model (LightModel): the model for which to calculate the standard deviations for all bins parameters (np.ndarray): central values of model parameters uncertainty (np.ndarray): uncertainty of model parameters @@ -351,9 +415,17 @@ def yield_stdev( up_comb = pyhf.tensorlib.to_numpy( model.main_model.expected_data(up_pars, return_by_sample=True) ) + # attach another entry with the total model prediction (sum over all samples) # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) + # print("up_comb:: ", up_comb) + if samples_merge_map is not None: + up_comb = _merge_sample_yields( + model, up_comb.tolist(), samples_merge_map, one_channel=True + ) + # print("up_comb post merge:: ", up_comb) + # turn into list of channels (keep all samples, select correct bins per channel) # indices: channel, sample, bin up_yields_per_channel = [ @@ -366,6 +438,8 @@ def yield_stdev( for chan_yields in up_yields_per_channel ] ) + # print("up_yields_channel_sum:: ", up_yields_channel_sum) + # reshape to drop bin axis, transpose to turn channel axis into new bin axis # (channel, sample, bin) -> (sample, bin) where "bin" becomes channel sums up_yields_channel_sum = up_yields_channel_sum.reshape( @@ -383,6 +457,13 @@ def yield_stdev( ) # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) + # print("down_comb:: ", up_comb) + if samples_merge_map is not None: + down_comb = _merge_sample_yields( + model, down_comb.tolist(), samples_merge_map, one_channel=True + ) + # print("down_comb post merge:: ", down_comb) + # turn into list of channels down_yields_per_channel = [ down_comb[:, model.config.channel_slices[ch]] @@ -534,11 +615,16 @@ def prediction( for ch in model.config.channels ] + if samples_merge_map is not None: + model_yields = _merge_sample_yields( + light_model, model_yields, samples_merge_map + ) + # calculate the total standard deviation of the model prediction # indices: (channel, sample, bin) for per-bin uncertainties, # (channel, sample) for per-channel total_stdev_model_bins, total_stdev_model_channels = yield_stdev( - model, param_values, param_uncertainty, corr_mat + light_model, param_values, param_uncertainty, corr_mat ) return ModelPrediction( diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index bd498a5d..75090aed 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -171,26 +171,31 @@ def test_prefit_uncertainties( def test__hashable_model_key(example_spec): # key matches for two models built from the same spec - model_1 = pyhf.Workspace(example_spec).model() - model_2 = pyhf.Workspace(example_spec).model() + model_1 = model_utils.LightModel(pyhf.Workspace(example_spec).model()) + model_2 = model_utils.LightModel(pyhf.Workspace(example_spec).model()) assert model_utils._hashable_model_key(model_1) == model_utils._hashable_model_key( model_2 ) # key does not match if model has different interpcode - model_new_interpcode = pyhf.Workspace(example_spec).model( - modifier_settings={ - "normsys": {"interpcode": "code1"}, - "histosys": {"interpcode": "code0"}, - } + model_new_interpcode = model_utils.LightModel( + pyhf.Workspace(example_spec).model( + modifier_settings={ + "normsys": {"interpcode": "code1"}, + "histosys": {"interpcode": "code0"}, + } + ) ) + assert model_utils._hashable_model_key(model_1) != model_utils._hashable_model_key( model_new_interpcode ) -def test_yield_stdev(example_spec, example_spec_multibin): - model = pyhf.Workspace(example_spec).model() +def test_yield_stdev( + example_spec, example_spec_multibin, example_spec_with_multiple_background +): + model = model_utils.LightModel(pyhf.Workspace(example_spec).model()) parameters = np.asarray([0.95, 1.05]) uncertainty = np.asarray([0.1, 0.1]) corr_mat = np.asarray([[1.0, 0.2], [0.2, 1.0]]) @@ -212,7 +217,7 @@ def test_yield_stdev(example_spec, example_spec_multibin): assert np.allclose(total_stdev_chan, [[2.56754823, 2.56754823]]) # multiple channels, bins, staterrors - model = pyhf.Workspace(example_spec_multibin).model() + model = model_utils.LightModel(pyhf.Workspace(example_spec_multibin).model()) parameters = np.asarray([1.3, 0.9, 1.05, 0.95]) uncertainty = np.asarray([0.3, 0.1, 0.05, 0.1]) corr_mat = np.asarray( @@ -253,6 +258,54 @@ def test_yield_stdev(example_spec, example_spec_multibin): assert np.allclose(from_cache[0][i_reg], expected_stdev_bin[i_reg]) assert np.allclose(from_cache[1][i_reg], expected_stdev_chan[i_reg]) + # Multiple backgrounds with sample merging + # post-fit + model = model_utils.LightModel( + pyhf.Workspace(example_spec_with_multiple_background).model(), + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + """ + fit_results = FitResults( + np.asarray([1.1, 1.01, 1.2]), + np.asarray([0.1, 0.03, 0.07]), + ["Signal strength", "staterror_Signal-Region[0]", "Background 2 norm"], + np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), + 0.0, + ) + model_pred = model_utils.prediction( + model, + fit_results=fit_results, + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + """ + parameters = np.asarray([1.1, 1.01, 1.2]) + uncertainty = np.asarray([0.1, 0.03, 0.07]) + corr_mat = np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]) + + total_stdev_bin, total_stdev_chan = model_utils.yield_stdev( + model, + parameters, + uncertainty, + corr_mat, + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + # assert np.allclose(total_stdev_bin, [[[xx], [xx]]]) + # assert np.allclose(total_stdev_chan, [[xx, xx]]) + + # pre-fit + parameters = np.asarray([1.0, 1.0, 1.0]) + uncertainty = np.asarray([0.0, 0.0495665682, 0.01]) + diag_corr_mat = np.diagflat([1.0, 1.0, 1.0]) + total_stdev_bin, total_stdev_chan = model_utils.yield_stdev( + model, + parameters, + uncertainty, + diag_corr_mat, + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + # assert np.allclose(total_stdev_bin, [[[xx], [xx]]]) # the staterror + # assert np.allclose(total_stdev_chan, [[xx, xx]]) + @mock.patch( "cabinetry.model_utils.yield_stdev", @@ -310,7 +363,7 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 1 - assert mock_stdev.call_args_list[0][0][0] == model + assert mock_stdev.call_args_list[0][0][0].pyhf_model == model assert np.allclose(mock_stdev.call_args_list[0][0][1], [1.0, 1.0, 1.0, 1.0]) assert np.allclose(mock_stdev.call_args_list[0][0][2], [0.0, 0.2, 0.4, 0.125]) assert np.allclose( @@ -342,7 +395,7 @@ def test_prediction( # call to stdev calculation with fit_results propagated assert mock_stdev.call_count == 2 - assert mock_stdev.call_args_list[1][0][0] == model + assert mock_stdev.call_args_list[1][0][0].pyhf_model == model assert np.allclose(mock_stdev.call_args_list[1][0][1], [1.1, 1.01]) assert np.allclose(mock_stdev.call_args_list[1][0][2], [0.1, 0.03]) assert np.allclose( From ba7517d0abd46956ed0d98076d8044bd85f39f64 Mon Sep 17 00:00:00 2001 From: maly Date: Fri, 21 Feb 2025 12:27:20 +0000 Subject: [PATCH 04/19] fix model_yields type after sample merge to be list --- src/cabinetry/model_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 99f96096..7f91d7f1 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -618,7 +618,7 @@ def prediction( if samples_merge_map is not None: model_yields = _merge_sample_yields( light_model, model_yields, samples_merge_map - ) + ).tolist() # calculate the total standard deviation of the model prediction # indices: (channel, sample, bin) for per-bin uncertainties, From e1b6190be9f5cb94ede1616f5762d288afd10cc5 Mon Sep 17 00:00:00 2001 From: maly Date: Fri, 21 Feb 2025 14:04:34 +0000 Subject: [PATCH 05/19] pass sample merging map to stdev from prediction call --- src/cabinetry/model_utils.py | 6 +++++- tests/test_model_utils.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 7f91d7f1..11226aa0 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -624,7 +624,11 @@ def prediction( # indices: (channel, sample, bin) for per-bin uncertainties, # (channel, sample) for per-channel total_stdev_model_bins, total_stdev_model_channels = yield_stdev( - light_model, param_values, param_uncertainty, corr_mat + light_model, + param_values, + param_uncertainty, + corr_mat, + samples_merge_map=samples_merge_map, ) return ModelPrediction( diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 75090aed..f702c314 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -369,7 +369,7 @@ def test_prediction( assert np.allclose( mock_stdev.call_args_list[0][0][3], np.diagflat([1.0, 1.0, 1.0, 1.0]) ) - assert mock_stdev.call_args_list[0][1] == {} + assert mock_stdev.call_args_list[0][1] == {"samples_merge_map": None} # post-fit prediction, single-channel model model = pyhf.Workspace(example_spec).model() @@ -401,7 +401,7 @@ def test_prediction( assert np.allclose( mock_stdev.call_args_list[1][0][3], np.asarray([[1.0, 0.2], [0.2, 1.0]]) ) - assert mock_stdev.call_args_list[1][1] == {} + assert mock_stdev.call_args_list[1][1] == {"samples_merge_map": None} caplog.clear() From 5a8220737811f2b852b8f5d43a31ec7cd27c8828 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 11:13:11 +0000 Subject: [PATCH 06/19] do not inherit LightConfig from _ModelConfig, and cleanup comments --- src/cabinetry/model_utils.py | 11 ++--------- tests/test_model_utils.py | 14 -------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 11226aa0..8bf6ba7a 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -18,13 +18,12 @@ _YIELD_STDEV_CACHE: Dict[Any, Tuple[List[List[List[float]]], List[List[float]]]] = {} -class LightConfig(pyhf.pdf._ModelConfig): +class LightConfig: def __init__( self, model: pyhf.pdf.Model, samples_merge_map: Optional[Dict[str, List[str]]] = None, ): - super().__init__(model.spec) self._original_config = model.config # this is going to break if more config kwargs added in pyhf _ModelConfig self.modifier_settings = self._original_config.modifier_settings @@ -415,16 +414,13 @@ def yield_stdev( up_comb = pyhf.tensorlib.to_numpy( model.main_model.expected_data(up_pars, return_by_sample=True) ) - # attach another entry with the total model prediction (sum over all samples) # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) - # print("up_comb:: ", up_comb) if samples_merge_map is not None: up_comb = _merge_sample_yields( model, up_comb.tolist(), samples_merge_map, one_channel=True ) - # print("up_comb post merge:: ", up_comb) # turn into list of channels (keep all samples, select correct bins per channel) # indices: channel, sample, bin @@ -438,7 +434,6 @@ def yield_stdev( for chan_yields in up_yields_per_channel ] ) - # print("up_yields_channel_sum:: ", up_yields_channel_sum) # reshape to drop bin axis, transpose to turn channel axis into new bin axis # (channel, sample, bin) -> (sample, bin) where "bin" becomes channel sums @@ -449,7 +444,7 @@ def yield_stdev( # concatenate per-channel sums to up_comb (along bin axis) up_yields = np.concatenate((up_comb, up_yields_channel_sum), axis=1) # indices: variation, sample, bin - up_variations.append(up_yields.tolist()) + up_variations.append(up_yields) # model distribution per sample with this parameter varied down down_comb = pyhf.tensorlib.to_numpy( @@ -457,12 +452,10 @@ def yield_stdev( ) # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) - # print("down_comb:: ", up_comb) if samples_merge_map is not None: down_comb = _merge_sample_yields( model, down_comb.tolist(), samples_merge_map, one_channel=True ) - # print("down_comb post merge:: ", down_comb) # turn into list of channels down_yields_per_channel = [ diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index f702c314..1d9ef315 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -264,20 +264,6 @@ def test_yield_stdev( pyhf.Workspace(example_spec_with_multiple_background).model(), samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) - """ - fit_results = FitResults( - np.asarray([1.1, 1.01, 1.2]), - np.asarray([0.1, 0.03, 0.07]), - ["Signal strength", "staterror_Signal-Region[0]", "Background 2 norm"], - np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), - 0.0, - ) - model_pred = model_utils.prediction( - model, - fit_results=fit_results, - samples_merge_map={"Total Background": ["Background", "Background 2"]}, - ) - """ parameters = np.asarray([1.1, 1.01, 1.2]) uncertainty = np.asarray([0.1, 0.03, 0.07]) corr_mat = np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]) From 94dfdf7dc19b344a3ad2ca75e0fe7a6993a5b343 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 11:28:08 +0000 Subject: [PATCH 07/19] fix backend test --- tests/test_backends.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_backends.py b/tests/test_backends.py index 0b93b43e..84ed8023 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -54,7 +54,12 @@ def test_backend_integration(backend, reset_backend): param_uncertainty = cabinetry.model_utils.prefit_uncertainties(model) corr_mat = np.zeros(shape=(len(param_values), len(param_values))) np.fill_diagonal(corr_mat, 1.0) - cabinetry.model_utils.yield_stdev(model, param_values, param_uncertainty, corr_mat) + cabinetry.model_utils.yield_stdev( + cabinetry.model_utils.LightModel(model), + param_values, + param_uncertainty, + corr_mat, + ) def test_backend_reset(): From 071bad6c37d9e77a856545c11fecaba7693d8fe0 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 11:54:15 +0000 Subject: [PATCH 08/19] change model in ModelPrediction object and change typing minimally in model_utils functions --- src/cabinetry/model_utils.py | 24 ++++++++++++++---------- tests/test_model_utils.py | 30 ++++++++++++++++++------------ 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 8bf6ba7a..1b6b02ea 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -145,8 +145,8 @@ class ModelPrediction(NamedTuple): """Model prediction with yields and total uncertainties per bin and channel. Args: - model (LightModel): model (a light-weight version of pyhf.pdf.Model) to - which prediction corresponds to + model (LightModel or pyhf.pdf.Model): model (or a light-weight version of + pyhf.pdf.Model) to which prediction corresponds to model_yields (List[List[List[float]]]): yields per channel, sample and bin, indices: channel, sample, bin total_stdev_model_bins (List[List[List[float]]]): total yield uncertainty per @@ -157,7 +157,7 @@ class ModelPrediction(NamedTuple): label (str): label for the prediction, e.g. "pre-fit" or "post-fit" """ - model: pyhf.pdf.Model + model: Union[LightModel, pyhf.pdf.Model] model_yields: List[List[List[float]]] total_stdev_model_bins: List[List[List[float]]] total_stdev_model_channels: List[List[float]] @@ -625,7 +625,11 @@ def prediction( ) return ModelPrediction( - model, model_yields, total_stdev_model_bins, total_stdev_model_channels, label + light_model, + model_yields, + total_stdev_model_bins, + total_stdev_model_channels, + label, ) @@ -707,11 +711,11 @@ def _poi_index( return poi_index -def _strip_auxdata(model: pyhf.pdf.Model, data: List[float]) -> List[float]: +def _strip_auxdata(model: LightModel, data: List[float]) -> List[float]: """Always returns observed yields, no matter whether data includes auxdata. Args: - model (pyhf.pdf.Model): model to which data corresponds to + model (LightModel): model to which data corresponds to data (List[float]): data, either including auxdata which is then stripped off or only observed yields @@ -725,11 +729,11 @@ def _strip_auxdata(model: pyhf.pdf.Model, data: List[float]) -> List[float]: return data -def _data_per_channel(model: pyhf.pdf.Model, data: List[float]) -> List[List[float]]: +def _data_per_channel(model: LightModel, data: List[float]) -> List[List[float]]: """Returns data split per channel, and strips off auxiliary data if included. Args: - model (pyhf.pdf.Model): model to which data corresponds to + model (LightModel): model to which data corresponds to data (List[float]): data (not split by channel), can either include auxdata which is then stripped off, or only observed yields @@ -747,12 +751,12 @@ def _data_per_channel(model: pyhf.pdf.Model, data: List[float]) -> List[List[flo def _filter_channels( - model: pyhf.pdf.Model, channels: Optional[Union[str, List[str]]] + model: LightModel, channels: Optional[Union[str, List[str]]] ) -> List[str]: """Returns a list of channels in a model after applying filtering. Args: - model (pyhf.pdf.Model): model from which to extract channels + model (LightModel): model from which to extract channels channels (Optional[Union[str, List[str]]]): name of channel or list of channels to filter, only including those channels provided via this argument in the return of the function diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 1d9ef315..8c5e74fb 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -331,7 +331,7 @@ def test_prediction( # pre-fit prediction, multi-channel model model_pred = model_utils.prediction(model) - assert model_pred.model == model + assert model_pred.model.pyhf_model == model # yields from pyhf expected_data call, per-bin / per-channel uncertainty from mock assert model_pred.model_yields == [[[25.0, 5.0]], [[8.0]]] assert model_pred.total_stdev_model_bins == [ @@ -367,7 +367,7 @@ def test_prediction( 0.0, ) model_pred = model_utils.prediction(model, fit_results=fit_results) - assert model_pred.model == model + assert model_pred.model.pyhf_model == model assert np.allclose(model_pred.model_yields, [[[57.54980000]]]) # new par value assert model_pred.total_stdev_model_bins == [[[0.3], [0.3]]] # from mock assert model_pred.total_stdev_model_channels == [[0.3, 0.3]] # from mock @@ -476,40 +476,46 @@ def test__poi_index(example_spec, caplog): def test__strip_auxdata(example_spec): model = pyhf.Workspace(example_spec).model() + light_model = model_utils.LightModel(model) data_with_aux = list(model.expected_data([1.0, 1.0], include_auxdata=True)) data_without_aux = list(model.expected_data([1.0, 1.0], include_auxdata=False)) - assert model_utils._strip_auxdata(model, data_with_aux) == [51.8] - assert model_utils._strip_auxdata(model, data_without_aux) == [51.8] + assert model_utils._strip_auxdata(light_model, data_with_aux) == [51.8] + assert model_utils._strip_auxdata(light_model, data_without_aux) == [51.8] @mock.patch("cabinetry.model_utils._strip_auxdata", return_value=[25.0, 5.0, 8.0]) def test__data_per_channel(mock_aux, example_spec_multibin): model = pyhf.Workspace(example_spec_multibin).model() + light_model = model_utils.LightModel(model) data = [25.0, 5.0, 8.0, 1.0, 1.0, 1.0] - data_per_ch = model_utils._data_per_channel(model, [25.0, 5.0, 8.0, 1.0, 1.0, 1.0]) + data_per_ch = model_utils._data_per_channel( + light_model, [25.0, 5.0, 8.0, 1.0, 1.0, 1.0] + ) assert data_per_ch == [[25.0, 5.0], [8.0]] # auxdata stripped and split by channel # auxdata and channel index call - assert mock_aux.call_args_list == [((model, data), {})] + assert mock_aux.call_args_list == [((light_model, data), {})] def test__filter_channels(example_spec_multibin, caplog): caplog.set_level(logging.DEBUG) model = pyhf.Workspace(example_spec_multibin).model() - - assert model_utils._filter_channels(model, None) == ["region_1", "region_2"] - assert model_utils._filter_channels(model, "region_1") == ["region_1"] - assert model_utils._filter_channels(model, ["region_1", "region_2"]) == [ + light_model = model_utils.LightModel(model) + assert model_utils._filter_channels(light_model, None) == ["region_1", "region_2"] + assert model_utils._filter_channels(light_model, "region_1") == ["region_1"] + assert model_utils._filter_channels(light_model, ["region_1", "region_2"]) == [ "region_1", "region_2", ] - assert model_utils._filter_channels(model, ["region_1", "abc"]) == ["region_1"] + assert model_utils._filter_channels(light_model, ["region_1", "abc"]) == [ + "region_1" + ] caplog.clear() # no matching channels - assert model_utils._filter_channels(model, "abc") == [] + assert model_utils._filter_channels(light_model, "abc") == [] assert ( "channel(s) ['abc'] not found in model, available channel(s): " "['region_1', 'region_2']" in [rec.message for rec in caplog.records] From 04269373299741d63bf29d1be1ac2357afd7e5cb Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 15:25:39 +0000 Subject: [PATCH 09/19] predictions test --- tests/test_model_utils.py | 74 +++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 8c5e74fb..7563d7f5 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -302,19 +302,28 @@ def test_yield_stdev( ), ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit, single-channel nominal ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit single-channel, custom - ([[[0.3], [0.3]]], [[0.3, 0.3]]), # post-fit single-channel, merged (values?) + ( + [[[12.5], [4.4], [16.7]]], + [[12.5, 4.4, 16.7]], + ), # pre-fit single-channel, merged samples model + ( + [[[12.5], [4.4], [16.7]]], + [[12.5, 4.4, 16.7]], + ), # post-fit single-channel, merged samples model ], ) @mock.patch( "cabinetry.model_utils.prefit_uncertainties", - return_value=np.asarray([0.0, 0.2, 0.4, 0.125]), + side_effect=[ + np.asarray([0.0, 0.2, 0.4, 0.125]), + np.asarray([0.0, 0.0, 0.039]), # pre-fit unc, merged samples model + ], ) @mock.patch( "cabinetry.model_utils.asimov_parameters", side_effect=[ np.asarray([1.0, 1.0, 1.0, 1.0]), - np.asarray([1.0, 1.0]), - np.asarray([1.0, 1.0]), + np.asarray([1.0, 1.0, 1.0]), ], ) def test_prediction( @@ -344,8 +353,8 @@ def test_prediction( assert model_pred.label == "pre-fit" # Asimov parameter calculation and pre-fit uncertainties - assert mock_asimov.call_args_list == [((model,), {})] - assert mock_unc.call_args_list == [((model,), {})] + assert mock_asimov.call_args_list[0] == ((model,), {}) + assert mock_unc.call_args_list[0] == ((model,), {}) # call to stdev calculation assert mock_stdev.call_count == 1 @@ -407,12 +416,38 @@ def test_prediction( caplog.clear() # Test with merging samples + # print("\n Testing merged samples \n") model = pyhf.Workspace(example_spec_with_multiple_background).model() + # pre-fit prediction, merged samples + model_pred = model_utils.prediction( + model, samples_merge_map={"Total Background": ["Background", "Background 2"]} + ) + assert model_pred.model.pyhf_model == model + # yields from pyhf expected_data call, per-bin / per-channel uncertainty from mock + assert model_pred.model_yields == [[[170.0], [50.0]]] + assert model_pred.total_stdev_model_bins == [[[12.5], [4.4], [16.7]]] + assert model_pred.total_stdev_model_channels == [[12.5, 4.4, 16.7]] + assert model_pred.label == "pre-fit" + + # Asimov parameter calculation and pre-fit uncertainties + assert mock_asimov.call_args_list[1] == ((model,), {}) + assert mock_unc.call_args_list[1] == ((model,), {}) + + assert mock_asimov.call_count == 2 # one new call + assert mock_unc.call_count == 2 # one new call + + # call to stdev calculation + assert mock_stdev.call_count == 4 + assert mock_stdev.call_args_list[3][1] == { + "samples_merge_map": {"Total Background": ["Background", "Background 2"]} + } + + # post-fit fit_results = FitResults( - np.asarray([1.1, 1.01, 1.2]), - np.asarray([0.1, 0.03, 0.07]), - ["Signal strength", "staterror_Signal-Region[0]", "Background 2 norm"], - np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), + np.asarray([1.2, 1.1, 1.01]), + np.asarray([0.1, 0.07, 0.03]), + ["Background 2 norm", "Signal strength", "staterror_Signal-Region[0]"], + np.asarray([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), 0.0, ) model_pred = model_utils.prediction( @@ -420,6 +455,25 @@ def test_prediction( fit_results=fit_results, samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) + assert model_pred.model.pyhf_model == model + assert np.allclose(model_pred.model_yields, [[[175.74], [55.55]]]) # new par value + assert model_pred.total_stdev_model_bins == [[[12.5], [4.4], [16.7]]] # from mock + assert model_pred.total_stdev_model_channels == [[12.5, 4.4, 16.7]] # from mock + assert model_pred.label == "post-fit" + assert "parameter names in fit results and model do not match" not in [ + rec.message for rec in caplog.records + ] + + # Asimov parameter calculation and pre-fit uncertainties + assert mock_asimov.call_count == 2 # no new call + assert mock_unc.call_count == 2 # no new call + + # call to stdev calculation + assert mock_stdev.call_count == 5 + assert mock_stdev.call_args_list[4][1] == { + "samples_merge_map": {"Total Background": ["Background", "Background 2"]} + } + caplog.clear() From 23b86976bb2f79919955c3efa965b645ff0052d2 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 16:29:11 +0000 Subject: [PATCH 10/19] add tests for stdev function --- tests/test_model_utils.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 7563d7f5..b373af7e 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -264,6 +264,7 @@ def test_yield_stdev( pyhf.Workspace(example_spec_with_multiple_background).model(), samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) + parameters = np.asarray([1.1, 1.01, 1.2]) uncertainty = np.asarray([0.1, 0.03, 0.07]) corr_mat = np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]) @@ -275,12 +276,17 @@ def test_yield_stdev( corr_mat, samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) - # assert np.allclose(total_stdev_bin, [[[xx], [xx]]]) - # assert np.allclose(total_stdev_chan, [[xx, xx]]) + assert np.allclose( + total_stdev_bin, + [[[12.510027977586642], [4.421993328805458], [16.66150128289767]]], + ) + assert np.allclose( + total_stdev_chan, [[12.510027977586642, 4.421993328805458, 16.66150128289767]] + ) # pre-fit parameters = np.asarray([1.0, 1.0, 1.0]) - uncertainty = np.asarray([0.0, 0.0495665682, 0.01]) + uncertainty = np.asarray([0.1, 0.03, 0.07]) diag_corr_mat = np.diagflat([1.0, 1.0, 1.0]) total_stdev_bin, total_stdev_chan = model_utils.yield_stdev( model, @@ -289,8 +295,13 @@ def test_yield_stdev( diag_corr_mat, samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) - # assert np.allclose(total_stdev_bin, [[[xx], [xx]]]) # the staterror - # assert np.allclose(total_stdev_chan, [[xx, xx]]) + assert np.allclose( + total_stdev_bin, + [[[12.066896867049131], [3.8078865529319543], [15.601602481796547]]], + ) + assert np.allclose( + total_stdev_chan, [[12.066896867049131, 3.8078865529319543, 15.601602481796547]] + ) @mock.patch( @@ -415,8 +426,7 @@ def test_prediction( assert model_pred.label == "abc" caplog.clear() - # Test with merging samples - # print("\n Testing merged samples \n") + # Multiple backgrounds with sample merging model = pyhf.Workspace(example_spec_with_multiple_background).model() # pre-fit prediction, merged samples model_pred = model_utils.prediction( @@ -447,7 +457,7 @@ def test_prediction( np.asarray([1.2, 1.1, 1.01]), np.asarray([0.1, 0.07, 0.03]), ["Background 2 norm", "Signal strength", "staterror_Signal-Region[0]"], - np.asarray([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), + np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), 0.0, ) model_pred = model_utils.prediction( From c312f2a2ea405095bee6f664ab5e8d86b16eb6b9 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 24 Feb 2025 18:16:08 +0000 Subject: [PATCH 11/19] write LightModel test and add set of samples as a key to cache --- src/cabinetry/model_utils.py | 4 +-- tests/test_model_utils.py | 66 +++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 1b6b02ea..9270a05f 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -383,6 +383,7 @@ def yield_stdev( tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes(), + ",".join(model.config.samples), ), None, ) @@ -480,7 +481,6 @@ def yield_stdev( # convert to numpy arrays for further processing up_variations_np = np.asarray(up_variations) down_variations_np = np.asarray(down_variations) - # calculate symmetric uncertainties for all components # indices: variation, channel (last entries sums), sample (last entry sum), bin sym_uncs = (up_variations_np - down_variations_np) / 2 @@ -544,6 +544,7 @@ def yield_stdev( tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes(), + ",".join(model.config.samples), ): (total_stdev_per_bin, total_stdev_per_channel) } ) @@ -578,7 +579,6 @@ def prediction( ModelPrediction: model, yields and uncertainties per channel, sample, bin """ light_model = LightModel(model, samples_merge_map) - log.debug(light_model.config.samples) if fit_results is not None: if fit_results.labels != model.config.par_names: log.warning("parameter names in fit results and model do not match") diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index b373af7e..7c3b7de8 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -10,6 +10,69 @@ from cabinetry.fit.results_containers import FitResults +def test_LightModel(example_spec_with_multiple_background): + # Test without merging samples + model = pyhf.Workspace(example_spec_with_multiple_background).model() + fit_results = FitResults( + np.asarray([1.0, 1.0, 1.0]), + np.asarray([0.1, 0.03, 0.07]), + ["Background 2 norm", "Signal strength", "staterror_Signal-Region[0]"], + np.asarray([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), + 0.0, + ) + model_pred = model_utils.prediction( + model, fit_results=fit_results, samples_merge_map=None + ) + + assert model_pred.model.config.samples == ["Background", "Background 2", "Signal"] + assert model_pred.model_yields == [[[150.0], [20.0], [50.0]]] + assert model_pred.total_stdev_model_bins == [ + [[10.5], [2.441311123146742], [3.8078865529319543], [15.601602481796547]] + ] + assert model_pred.total_stdev_model_channels == [ + [10.5, 2.441311123146742, 3.8078865529319543, 15.601602481796547] + ] + assert model_pred.model.pyhf_model == model + assert model_pred.model.config.channels == model.config.channels + assert model_pred.model.config.channel_nbins == model.config.channel_nbins + assert model_pred.model.config.channel_slices == model.config.channel_slices + assert model_pred.model.config.modifier_settings == model.config.modifier_settings + assert model_pred.model.spec == model.spec + + # Test with merging samples + model_pred = model_utils.prediction( + model, + fit_results=fit_results, + samples_merge_map={"Total Background": ["Background", "Background 2"]}, + ) + + assert model_pred.model.config.samples == ["Total Background", "Signal"] + assert model_pred.model_yields == [[[170.0], [50.0]]] + assert model_pred.total_stdev_model_bins == [ + [[12.066896867049131], [3.8078865529319543], [15.601602481796547]] + ] + assert model_pred.total_stdev_model_channels == [ + [12.066896867049131, 3.8078865529319543, 15.601602481796547] + ] + assert model_pred.model.pyhf_model == model + assert model_pred.model.config.channels == model.config.channels + assert model_pred.model.config.channel_nbins == model.config.channel_nbins + assert model_pred.model.config.channel_slices == model.config.channel_slices + assert model_pred.model.config.modifier_settings == model.config.modifier_settings + assert model_pred.model.spec == model.spec + + with pytest.raises( + AttributeError, + match="'LightConfig' object has no attribute 'NonExistent'", + ): + model_pred.model.config.NonExistent + with pytest.raises( + AttributeError, + match="'LightModel' object has no attribute 'NonExistent'", + ): + model_pred.model.NonExistent + + def test_ModelPrediction(example_spec): model = pyhf.Workspace(example_spec).model() model_yields = [[[10.0]]] @@ -253,6 +316,7 @@ def test_yield_stdev( tuple(parameters), tuple(uncertainty), corr_mat.tobytes(), + ",".join(model.config.samples), ] for i_reg in range(2): assert np.allclose(from_cache[0][i_reg], expected_stdev_bin[i_reg]) @@ -455,7 +519,7 @@ def test_prediction( # post-fit fit_results = FitResults( np.asarray([1.2, 1.1, 1.01]), - np.asarray([0.1, 0.07, 0.03]), + np.asarray([0.1, 0.03, 0.07]), ["Background 2 norm", "Signal strength", "staterror_Signal-Region[0]"], np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), 0.0, From fd02e38bddd0a9de797cd473340357b3012157dd Mon Sep 17 00:00:00 2001 From: maly Date: Tue, 25 Feb 2025 14:34:27 +0000 Subject: [PATCH 12/19] simplify the LightModel and LightConfig to only hold minimal information --- src/cabinetry/model_utils.py | 96 +++++++++++++++++++----------------- tests/test_model_utils.py | 60 +++++++++++----------- 2 files changed, 80 insertions(+), 76 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 9270a05f..064a48bf 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -24,23 +24,30 @@ def __init__( model: pyhf.pdf.Model, samples_merge_map: Optional[Dict[str, List[str]]] = None, ): - self._original_config = model.config + self.samples = model.config.samples + self.channels = model.config.channels + self.channel_slices = model.config.channel_slices + self.channel_nbins = model.config.channel_nbins + self.npars = model.config.npars + # self._original_config = model.config # this is going to break if more config kwargs added in pyhf _ModelConfig - self.modifier_settings = self._original_config.modifier_settings + self.modifier_settings = model.config.modifier_settings + self.samples_merge_map = samples_merge_map self.merged_samples_indices = None if samples_merge_map is not None: self._update_samples(samples_merge_map) - def __getattr__(self, name): - """ - Dynamically forward attribute access to the original config if not found. - """ - try: - return getattr(self._original_config, name) - except AttributeError as e: - raise AttributeError( - f"'LightConfig' object has no attribute '{name}'" - ) from e + # def __getattr__(self, name): + # """ + # Dynamically forward attribute access to the original config if not found. + # """ + # try: + # print("name from config: ", name) + # return getattr(self._original_config, name) + # except AttributeError as e: + # raise AttributeError( + # f"'LightConfig' object has no attribute '{name}'" + # ) from e @property def samples(self) -> List[str]: @@ -86,30 +93,25 @@ def __init__( model: pyhf.pdf.Model, samples_merge_map: Optional[Dict[str, List[str]]] = None, ): - # super().__init__(model.spec) self.config = LightConfig(model, samples_merge_map) - self._pyhf_model = model - - def __getattr__(self, name): - """ - Dynamically forward attribute access to the original config if not found. - """ - try: - return getattr(self._pyhf_model, name) - except AttributeError as e: - raise AttributeError( - f"'LightModel' object has no attribute '{name}'" - ) from e + self.spec = model.spec - @property - def pyhf_model(self): - return self._pyhf_model + # def __getattr__(self, name): + # """ + # Dynamically forward attribute access to the original model if not found. + # """ + # try: + # print("name from model: ", name) + # return getattr(self._pyhf_model, name) + # except AttributeError as e: + # raise AttributeError( + # f"'LightModel' object has no attribute '{name}'" + # ) from e def _merge_sample_yields( model: LightModel, old_yields: Union[List[List[List[float]]], List[List[float]]], - samples_merge_map: Dict[str, List[str]], one_channel: Optional[bool] = False, ): @@ -127,7 +129,10 @@ def _sum_per_channel(i_ch=None): old_yield, model.config.merged_samples_indices, axis=0 ) model_yields_one_channel = np.insert( - remaining_samples, np.arange(len(samples_merge_map)), summed_sample, axis=0 + remaining_samples, + np.arange(len(model.config.samples_merge_map)), + summed_sample, + axis=0, ) return model_yields_one_channel @@ -314,7 +319,7 @@ def prefit_uncertainties(model: pyhf.pdf.Model) -> np.ndarray: def _hashable_model_key( - model: LightModel, # pyhf.pdf.Model, + model: pyhf.pdf.Model, ) -> Tuple[str, Tuple[Tuple[str, str], ...]]: """Compute a hashable representation of the values that uniquely identify a model. @@ -345,11 +350,11 @@ def _hashable_model_key( def yield_stdev( - model: LightModel, + model: pyhf.pdf.Model, parameters: np.ndarray, uncertainty: np.ndarray, corr_mat: np.ndarray, - samples_merge_map: Optional[Dict[str, List[str]]] = None, + light_model: Optional[LightModel] = None, ) -> Tuple[List[List[List[float]]], List[List[float]]]: """Calculates symmetrized model yield standard deviation per channel / sample / bin. @@ -377,13 +382,18 @@ def yield_stdev( over all samples) """ # check whether results are already stored in cache + samples_string = ( + ",".join(light_model.config.samples) + if light_model is not None + else ",".join(model.config.samples) + ) cached_results = _YIELD_STDEV_CACHE.get( ( _hashable_model_key(model), tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes(), - ",".join(model.config.samples), + samples_string, ), None, ) @@ -418,9 +428,9 @@ def yield_stdev( # attach another entry with the total model prediction (sum over all samples) # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) - if samples_merge_map is not None: + if light_model is not None: up_comb = _merge_sample_yields( - model, up_comb.tolist(), samples_merge_map, one_channel=True + light_model, up_comb.tolist(), one_channel=True ) # turn into list of channels (keep all samples, select correct bins per channel) @@ -453,9 +463,9 @@ def yield_stdev( ) # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) - if samples_merge_map is not None: + if light_model is not None: down_comb = _merge_sample_yields( - model, down_comb.tolist(), samples_merge_map, one_channel=True + light_model, down_comb.tolist(), one_channel=True ) # turn into list of channels @@ -544,7 +554,7 @@ def yield_stdev( tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes(), - ",".join(model.config.samples), + samples_string, ): (total_stdev_per_bin, total_stdev_per_channel) } ) @@ -609,19 +619,17 @@ def prediction( ] if samples_merge_map is not None: - model_yields = _merge_sample_yields( - light_model, model_yields, samples_merge_map - ).tolist() + model_yields = _merge_sample_yields(light_model, model_yields).tolist() # calculate the total standard deviation of the model prediction # indices: (channel, sample, bin) for per-bin uncertainties, # (channel, sample) for per-channel total_stdev_model_bins, total_stdev_model_channels = yield_stdev( - light_model, + model, param_values, param_uncertainty, corr_mat, - samples_merge_map=samples_merge_map, + light_model=light_model if samples_merge_map is not None else None, ) return ModelPrediction( diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 7c3b7de8..7a5d3758 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -32,7 +32,6 @@ def test_LightModel(example_spec_with_multiple_background): assert model_pred.total_stdev_model_channels == [ [10.5, 2.441311123146742, 3.8078865529319543, 15.601602481796547] ] - assert model_pred.model.pyhf_model == model assert model_pred.model.config.channels == model.config.channels assert model_pred.model.config.channel_nbins == model.config.channel_nbins assert model_pred.model.config.channel_slices == model.config.channel_slices @@ -54,7 +53,6 @@ def test_LightModel(example_spec_with_multiple_background): assert model_pred.total_stdev_model_channels == [ [12.066896867049131, 3.8078865529319543, 15.601602481796547] ] - assert model_pred.model.pyhf_model == model assert model_pred.model.config.channels == model.config.channels assert model_pred.model.config.channel_nbins == model.config.channel_nbins assert model_pred.model.config.channel_slices == model.config.channel_slices @@ -234,20 +232,18 @@ def test_prefit_uncertainties( def test__hashable_model_key(example_spec): # key matches for two models built from the same spec - model_1 = model_utils.LightModel(pyhf.Workspace(example_spec).model()) - model_2 = model_utils.LightModel(pyhf.Workspace(example_spec).model()) + model_1 = pyhf.Workspace(example_spec).model() + model_2 = pyhf.Workspace(example_spec).model() assert model_utils._hashable_model_key(model_1) == model_utils._hashable_model_key( model_2 ) # key does not match if model has different interpcode - model_new_interpcode = model_utils.LightModel( - pyhf.Workspace(example_spec).model( - modifier_settings={ - "normsys": {"interpcode": "code1"}, - "histosys": {"interpcode": "code0"}, - } - ) + model_new_interpcode = pyhf.Workspace(example_spec).model( + modifier_settings={ + "normsys": {"interpcode": "code1"}, + "histosys": {"interpcode": "code0"}, + } ) assert model_utils._hashable_model_key(model_1) != model_utils._hashable_model_key( @@ -258,7 +254,7 @@ def test__hashable_model_key(example_spec): def test_yield_stdev( example_spec, example_spec_multibin, example_spec_with_multiple_background ): - model = model_utils.LightModel(pyhf.Workspace(example_spec).model()) + model = pyhf.Workspace(example_spec).model() parameters = np.asarray([0.95, 1.05]) uncertainty = np.asarray([0.1, 0.1]) corr_mat = np.asarray([[1.0, 0.2], [0.2, 1.0]]) @@ -280,7 +276,7 @@ def test_yield_stdev( assert np.allclose(total_stdev_chan, [[2.56754823, 2.56754823]]) # multiple channels, bins, staterrors - model = model_utils.LightModel(pyhf.Workspace(example_spec_multibin).model()) + model = pyhf.Workspace(example_spec_multibin).model() parameters = np.asarray([1.3, 0.9, 1.05, 0.95]) uncertainty = np.asarray([0.3, 0.1, 0.05, 0.1]) corr_mat = np.asarray( @@ -324,11 +320,11 @@ def test_yield_stdev( # Multiple backgrounds with sample merging # post-fit - model = model_utils.LightModel( - pyhf.Workspace(example_spec_with_multiple_background).model(), + model = pyhf.Workspace(example_spec_with_multiple_background).model() + light_model = model_utils.LightModel( + model, samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) - parameters = np.asarray([1.1, 1.01, 1.2]) uncertainty = np.asarray([0.1, 0.03, 0.07]) corr_mat = np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]) @@ -338,7 +334,7 @@ def test_yield_stdev( parameters, uncertainty, corr_mat, - samples_merge_map={"Total Background": ["Background", "Background 2"]}, + light_model=light_model, ) assert np.allclose( total_stdev_bin, @@ -357,7 +353,7 @@ def test_yield_stdev( parameters, uncertainty, diag_corr_mat, - samples_merge_map={"Total Background": ["Background", "Background 2"]}, + light_model=light_model, ) assert np.allclose( total_stdev_bin, @@ -415,7 +411,8 @@ def test_prediction( # pre-fit prediction, multi-channel model model_pred = model_utils.prediction(model) - assert model_pred.model.pyhf_model == model + assert model_pred.model.config.samples == model.config.samples + assert model_pred.model.spec == model.spec # yields from pyhf expected_data call, per-bin / per-channel uncertainty from mock assert model_pred.model_yields == [[[25.0, 5.0]], [[8.0]]] assert model_pred.total_stdev_model_bins == [ @@ -433,13 +430,13 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 1 - assert mock_stdev.call_args_list[0][0][0].pyhf_model == model + assert mock_stdev.call_args_list[0][0][0].spec == model.spec assert np.allclose(mock_stdev.call_args_list[0][0][1], [1.0, 1.0, 1.0, 1.0]) assert np.allclose(mock_stdev.call_args_list[0][0][2], [0.0, 0.2, 0.4, 0.125]) assert np.allclose( mock_stdev.call_args_list[0][0][3], np.diagflat([1.0, 1.0, 1.0, 1.0]) ) - assert mock_stdev.call_args_list[0][1] == {"samples_merge_map": None} + assert mock_stdev.call_args_list[0][1] == {"light_model": None} # post-fit prediction, single-channel model model = pyhf.Workspace(example_spec).model() @@ -451,7 +448,8 @@ def test_prediction( 0.0, ) model_pred = model_utils.prediction(model, fit_results=fit_results) - assert model_pred.model.pyhf_model == model + assert model_pred.model.config.samples == model.config.samples + assert model_pred.model.spec == model.spec assert np.allclose(model_pred.model_yields, [[[57.54980000]]]) # new par value assert model_pred.total_stdev_model_bins == [[[0.3], [0.3]]] # from mock assert model_pred.total_stdev_model_channels == [[0.3, 0.3]] # from mock @@ -465,13 +463,13 @@ def test_prediction( # call to stdev calculation with fit_results propagated assert mock_stdev.call_count == 2 - assert mock_stdev.call_args_list[1][0][0].pyhf_model == model + assert mock_stdev.call_args_list[1][0][0].spec == model.spec assert np.allclose(mock_stdev.call_args_list[1][0][1], [1.1, 1.01]) assert np.allclose(mock_stdev.call_args_list[1][0][2], [0.1, 0.03]) assert np.allclose( mock_stdev.call_args_list[1][0][3], np.asarray([[1.0, 0.2], [0.2, 1.0]]) ) - assert mock_stdev.call_args_list[1][1] == {"samples_merge_map": None} + assert mock_stdev.call_args_list[1][1] == {"light_model": None} caplog.clear() @@ -496,7 +494,8 @@ def test_prediction( model_pred = model_utils.prediction( model, samples_merge_map={"Total Background": ["Background", "Background 2"]} ) - assert model_pred.model.pyhf_model == model + assert len(model_pred.model.config.samples) == len(model.config.samples) - 1 + assert model_pred.model.spec == model.spec # yields from pyhf expected_data call, per-bin / per-channel uncertainty from mock assert model_pred.model_yields == [[[170.0], [50.0]]] assert model_pred.total_stdev_model_bins == [[[12.5], [4.4], [16.7]]] @@ -512,9 +511,7 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 4 - assert mock_stdev.call_args_list[3][1] == { - "samples_merge_map": {"Total Background": ["Background", "Background 2"]} - } + assert mock_stdev.call_args_list[3][1] == {"light_model": model_pred.model} # post-fit fit_results = FitResults( @@ -529,7 +526,8 @@ def test_prediction( fit_results=fit_results, samples_merge_map={"Total Background": ["Background", "Background 2"]}, ) - assert model_pred.model.pyhf_model == model + assert len(model_pred.model.config.samples) == len(model.config.samples) - 1 + assert model_pred.model.spec == model.spec assert np.allclose(model_pred.model_yields, [[[175.74], [55.55]]]) # new par value assert model_pred.total_stdev_model_bins == [[[12.5], [4.4], [16.7]]] # from mock assert model_pred.total_stdev_model_channels == [[12.5, 4.4, 16.7]] # from mock @@ -544,9 +542,7 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 5 - assert mock_stdev.call_args_list[4][1] == { - "samples_merge_map": {"Total Background": ["Background", "Background 2"]} - } + assert mock_stdev.call_args_list[4][1] == {"light_model": model_pred.model} caplog.clear() From 2cb952d0bc27b910a553c8290eb22af071fcf836 Mon Sep 17 00:00:00 2001 From: maly Date: Tue, 25 Feb 2025 14:36:26 +0000 Subject: [PATCH 13/19] clean comments --- src/cabinetry/model_utils.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 064a48bf..e03eda94 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -37,18 +37,6 @@ def __init__( if samples_merge_map is not None: self._update_samples(samples_merge_map) - # def __getattr__(self, name): - # """ - # Dynamically forward attribute access to the original config if not found. - # """ - # try: - # print("name from config: ", name) - # return getattr(self._original_config, name) - # except AttributeError as e: - # raise AttributeError( - # f"'LightConfig' object has no attribute '{name}'" - # ) from e - @property def samples(self) -> List[str]: """ @@ -96,18 +84,6 @@ def __init__( self.config = LightConfig(model, samples_merge_map) self.spec = model.spec - # def __getattr__(self, name): - # """ - # Dynamically forward attribute access to the original model if not found. - # """ - # try: - # print("name from model: ", name) - # return getattr(self._pyhf_model, name) - # except AttributeError as e: - # raise AttributeError( - # f"'LightModel' object has no attribute '{name}'" - # ) from e - def _merge_sample_yields( model: LightModel, From 548ec1f141cadb1636704123cceb6c9b3e817c6f Mon Sep 17 00:00:00 2001 From: maly Date: Tue, 25 Feb 2025 14:44:12 +0000 Subject: [PATCH 14/19] missing update of backend test --- tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_backends.py b/tests/test_backends.py index 84ed8023..54bd29ac 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -55,7 +55,7 @@ def test_backend_integration(backend, reset_backend): corr_mat = np.zeros(shape=(len(param_values), len(param_values))) np.fill_diagonal(corr_mat, 1.0) cabinetry.model_utils.yield_stdev( - cabinetry.model_utils.LightModel(model), + model, param_values, param_uncertainty, corr_mat, From f0f6ce5a529515d5dc68dc83bacbb427b5bb7008 Mon Sep 17 00:00:00 2001 From: maly Date: Thu, 13 Mar 2025 11:59:08 +0000 Subject: [PATCH 15/19] mypy -fixes- or -getarounds- --- src/cabinetry/model_utils.py | 66 ++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index ae50f0aa..d0158583 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -43,7 +43,7 @@ def __init__( # this is going to break if more config kwargs added in pyhf _ModelConfig self.modifier_settings = model.config.modifier_settings self.samples_merge_map = samples_merge_map - self.merged_samples_indices = None + self.merged_samples_indices: np.ndarray = np.zeros(0) if samples_merge_map is not None: self._update_samples(samples_merge_map) @@ -55,13 +55,13 @@ def samples(self) -> List[str]: return self._samples @samples.setter - def samples(self, samples): + def samples(self, samples: List[str]) -> None: """ Set the Ordered list of sample names in the model. """ self._samples = samples - def _update_samples(self, samples_merge_map: Optional[Dict[str, List[str]]]): + def _update_samples(self, samples_merge_map: Dict[str, List[str]]) -> None: # Delete samples being merged from config # Flatten all merged samples into a set for O(1) lookups merged_samples_set = { @@ -74,15 +74,20 @@ def _update_samples(self, samples_merge_map: Optional[Dict[str, List[str]]]): for idx, sample in enumerate(self.samples) if sample in merged_samples_set ] - self.merged_samples_indices = merged_samples_indices - self.samples = np.delete(self.samples, merged_samples_indices).tolist() + self.merged_samples_indices = np.asarray(merged_samples_indices) + self.samples = cast( + List[str], np.delete(self.samples, merged_samples_indices).tolist() + ) # Add new samples at the bottom of stack - self.samples = np.insert( - np.array(self.samples, dtype=object), - np.arange(len(samples_merge_map)), - list(samples_merge_map.keys()), - axis=0, - ).tolist() + self.samples = cast( + List[str], + np.insert( + np.asarray(self.samples, dtype=object), + np.arange(len(samples_merge_map)), + list(samples_merge_map.keys()), + axis=0, + ).tolist(), + ) class LightModel: @@ -97,39 +102,43 @@ def __init__( def _merge_sample_yields( model: LightModel, - old_yields: Union[List[List[List[float]]], List[List[float]]], + old_yields: np.ndarray, one_channel: Optional[bool] = False, -): +) -> np.ndarray: - def _sum_per_channel(i_ch=None): + def _sum_per_channel(i_ch: Optional[int] = None) -> np.ndarray: + # explicit type casting because mypy worries that old_yield + # will be list(list(float)) or list(float) + # but this will never happen because of the if condition if i_ch is not None: old_yield = old_yields[i_ch] else: old_yield = old_yields # for each channel, sum together the desired samples - summed_sample = np.sum( - np.asarray(old_yield)[model.config.merged_samples_indices], axis=0 - ) + summed_sample = np.sum(old_yield[model.config.merged_samples_indices], axis=0) # build set of remaining samples and remove the ones already summed - remaining_samples = np.delete( + remaining_samples: np.ndarray = np.delete( old_yield, model.config.merged_samples_indices, axis=0 ) + # mypy not able to tell that map cannot be None-typed + # so we explicitly cast it to use dict attributes + samples_merge_map = cast(Dict[str, List[str]], model.config.samples_merge_map) model_yields_one_channel = np.insert( remaining_samples, - np.arange(len(model.config.samples_merge_map)), + np.arange(len(samples_merge_map.keys())), summed_sample, axis=0, ) return model_yields_one_channel - new_yields = [] + new_yields = np.zeros(0) if not one_channel: for i_ch in range(len(model.config.channels)): - new_yields.append(_sum_per_channel(i_ch=i_ch)) + np.append(new_yields, _sum_per_channel(i_ch=i_ch)) else: new_yields = _sum_per_channel() - return np.array(new_yields) + return new_yields class ModelPrediction(NamedTuple): @@ -415,9 +424,7 @@ def yield_stdev( # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) if light_model is not None: - up_comb = _merge_sample_yields( - light_model, up_comb.tolist(), one_channel=True - ) + up_comb = _merge_sample_yields(light_model, up_comb, one_channel=True) # turn into list of channels (keep all samples, select correct bins per channel) # indices: channel, sample, bin @@ -450,9 +457,7 @@ def yield_stdev( # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) if light_model is not None: - down_comb = _merge_sample_yields( - light_model, down_comb.tolist(), one_channel=True - ) + down_comb = _merge_sample_yields(light_model, down_comb, one_channel=True) # turn into list of channels down_yields_per_channel = [ @@ -605,7 +610,10 @@ def prediction( ] if samples_merge_map is not None: - model_yields = _merge_sample_yields(light_model, model_yields).tolist() + model_yields = cast( + List[List[List[str]]], + _merge_sample_yields(light_model, np.asarray(model_yields)).tolist(), + ) # calculate the total standard deviation of the model prediction # indices: (channel, sample, bin) for per-bin uncertainties, From 1efdadf2a4d32938718db548911f4cbd31a0c7ab Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 17 Mar 2025 17:06:16 +0100 Subject: [PATCH 16/19] fix mutable class defaults and typing issues --- src/cabinetry/model_utils.py | 56 ++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index d0158583..f9556906 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -39,7 +39,6 @@ def __init__( self.channel_slices = model.config.channel_slices self.channel_nbins = model.config.channel_nbins self.npars = model.config.npars - # self._original_config = model.config # this is going to break if more config kwargs added in pyhf _ModelConfig self.modifier_settings = model.config.modifier_settings self.samples_merge_map = samples_merge_map @@ -102,43 +101,53 @@ def __init__( def _merge_sample_yields( model: LightModel, - old_yields: np.ndarray, + old_yields: Union[List[List[List[float]]], List[List[float]]], one_channel: Optional[bool] = False, ) -> np.ndarray: + samples_merge_map = model.config.samples_merge_map + def _sum_per_channel(i_ch: Optional[int] = None) -> np.ndarray: # explicit type casting because mypy worries that old_yield # will be list(list(float)) or list(float) # but this will never happen because of the if condition if i_ch is not None: - old_yield = old_yields[i_ch] + old_yield = cast(List[List[float]], old_yields[i_ch]) else: - old_yield = old_yields + old_yield = cast(List[List[float]], old_yields) # for each channel, sum together the desired samples - summed_sample = np.sum(old_yield[model.config.merged_samples_indices], axis=0) + summed_sample = np.sum( + np.asarray(old_yield)[model.config.merged_samples_indices], axis=0 + ) # build set of remaining samples and remove the ones already summed remaining_samples: np.ndarray = np.delete( old_yield, model.config.merged_samples_indices, axis=0 ) # mypy not able to tell that map cannot be None-typed - # so we explicitly cast it to use dict attributes - samples_merge_map = cast(Dict[str, List[str]], model.config.samples_merge_map) - model_yields_one_channel = np.insert( - remaining_samples, - np.arange(len(samples_merge_map.keys())), - summed_sample, - axis=0, - ) + # so we have to check + if samples_merge_map is not None: + model_yields_one_channel = np.insert( + remaining_samples, + np.arange(len(samples_merge_map.keys())), + summed_sample, + axis=0, + ) + else: + log.critical( + "Something has gone wrong in merging samples." + + " Report this to the dev team." + ) return model_yields_one_channel - new_yields = np.zeros(0) + new_yields = [] if not one_channel: for i_ch in range(len(model.config.channels)): - np.append(new_yields, _sum_per_channel(i_ch=i_ch)) + new_yields.append(_sum_per_channel(i_ch=i_ch)) else: - new_yields = _sum_per_channel() + new_yields = [_sum_per_channel()] # wrap in list for consistent type - return new_yields + return_yields = np.asarray(new_yields[0]) if one_channel else np.asarray(new_yields) + return return_yields class ModelPrediction(NamedTuple): @@ -424,7 +433,9 @@ def yield_stdev( # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) if light_model is not None: - up_comb = _merge_sample_yields(light_model, up_comb, one_channel=True) + up_comb = _merge_sample_yields( + light_model, up_comb.tolist(), one_channel=True + ) # turn into list of channels (keep all samples, select correct bins per channel) # indices: channel, sample, bin @@ -457,7 +468,9 @@ def yield_stdev( # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) if light_model is not None: - down_comb = _merge_sample_yields(light_model, down_comb, one_channel=True) + down_comb = _merge_sample_yields( + light_model, down_comb.tolist(), one_channel=True + ) # turn into list of channels down_yields_per_channel = [ @@ -611,8 +624,8 @@ def prediction( if samples_merge_map is not None: model_yields = cast( - List[List[List[str]]], - _merge_sample_yields(light_model, np.asarray(model_yields)).tolist(), + List[List[List[float]]], + _merge_sample_yields(light_model, model_yields).tolist(), ) # calculate the total standard deviation of the model prediction @@ -625,7 +638,6 @@ def prediction( corr_mat, light_model=light_model if samples_merge_map is not None else None, ) - return ModelPrediction( light_model, model_yields, From f97ef390aabf4706e5ed36aa7cf815e65e511fc2 Mon Sep 17 00:00:00 2001 From: maly Date: Mon, 17 Mar 2025 17:12:01 +0100 Subject: [PATCH 17/19] missing docstring --- src/cabinetry/model_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index f9556906..2c84f199 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -370,11 +370,13 @@ def yield_stdev( of this function are cached to speed up subsequent calls with the same arguments. Args: - model (LightModel): the model for which to calculate the standard deviations + model (pyhf.pdf.Model): the model for which to calculate the standard deviations for all bins parameters (np.ndarray): central values of model parameters uncertainty (np.ndarray): uncertainty of model parameters corr_mat (np.ndarray): correlation matrix + light_model (Optional[LightModel], optional): light-weight model to use for + merging samples, defaults to None Returns: Tuple[List[List[List[float]]], List[List[float]]]: From a48d8f27772bade4b55ba7d3d2cdaa47f68bd07c Mon Sep 17 00:00:00 2001 From: maly Date: Thu, 27 Mar 2025 17:21:01 +0100 Subject: [PATCH 18/19] pass sample merging maps around, not light model --- src/cabinetry/model_utils.py | 15 ++++++++------- tests/test_model_utils.py | 24 +++++++++++------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 2c84f199..fa83773e 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -358,7 +358,7 @@ def yield_stdev( parameters: np.ndarray, uncertainty: np.ndarray, corr_mat: np.ndarray, - light_model: Optional[LightModel] = None, + samples_merge_map: Optional[Dict[str, List[str]]] = None, ) -> Tuple[List[List[List[float]]], List[List[float]]]: """Calculates symmetrized model yield standard deviation per channel / sample / bin. @@ -375,8 +375,8 @@ def yield_stdev( parameters (np.ndarray): central values of model parameters uncertainty (np.ndarray): uncertainty of model parameters corr_mat (np.ndarray): correlation matrix - light_model (Optional[LightModel], optional): light-weight model to use for - merging samples, defaults to None + samples_merge_map (Optional[Dict[str, List[str]]], optional): a map specifying + how to merge samples, defaults to None Returns: Tuple[List[List[List[float]]], List[List[float]]]: @@ -387,10 +387,11 @@ def yield_stdev( the standard deviations per sample (the last sample corresponds to a sum over all samples) """ + light_model = LightModel(model, samples_merge_map) # check whether results are already stored in cache samples_string = ( ",".join(light_model.config.samples) - if light_model is not None + if samples_merge_map is not None else ",".join(model.config.samples) ) cached_results = _YIELD_STDEV_CACHE.get( @@ -434,7 +435,7 @@ def yield_stdev( # attach another entry with the total model prediction (sum over all samples) # indices: sample, bin up_comb = np.vstack((up_comb, np.sum(up_comb, axis=0))) - if light_model is not None: + if samples_merge_map is not None: up_comb = _merge_sample_yields( light_model, up_comb.tolist(), one_channel=True ) @@ -469,7 +470,7 @@ def yield_stdev( ) # add total prediction (sum over samples) down_comb = np.vstack((down_comb, np.sum(down_comb, axis=0))) - if light_model is not None: + if samples_merge_map is not None: down_comb = _merge_sample_yields( light_model, down_comb.tolist(), one_channel=True ) @@ -638,7 +639,7 @@ def prediction( param_values, param_uncertainty, corr_mat, - light_model=light_model if samples_merge_map is not None else None, + samples_merge_map=samples_merge_map, ) return ModelPrediction( light_model, diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 243bd77d..0c4d5645 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -319,12 +319,9 @@ def test_yield_stdev( assert np.allclose(from_cache[1][i_reg], expected_stdev_chan[i_reg]) # Multiple backgrounds with sample merging + samples_merge_map = {"Total Background": ["Background", "Background 2"]} # post-fit model = pyhf.Workspace(example_spec_with_multiple_background).model() - light_model = model_utils.LightModel( - model, - samples_merge_map={"Total Background": ["Background", "Background 2"]}, - ) parameters = np.asarray([1.1, 1.01, 1.2]) uncertainty = np.asarray([0.1, 0.03, 0.07]) corr_mat = np.asarray([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]) @@ -334,7 +331,7 @@ def test_yield_stdev( parameters, uncertainty, corr_mat, - light_model=light_model, + samples_merge_map=samples_merge_map, ) assert np.allclose( total_stdev_bin, @@ -353,7 +350,7 @@ def test_yield_stdev( parameters, uncertainty, diag_corr_mat, - light_model=light_model, + samples_merge_map=samples_merge_map, ) assert np.allclose( total_stdev_bin, @@ -436,7 +433,7 @@ def test_prediction( assert np.allclose( mock_stdev.call_args_list[0][0][3], np.diagflat([1.0, 1.0, 1.0, 1.0]) ) - assert mock_stdev.call_args_list[0][1] == {"light_model": None} + assert mock_stdev.call_args_list[0][1] == {"samples_merge_map": None} # post-fit prediction, single-channel model model = pyhf.Workspace(example_spec).model() @@ -469,7 +466,7 @@ def test_prediction( assert np.allclose( mock_stdev.call_args_list[1][0][3], np.asarray([[1.0, 0.2], [0.2, 1.0]]) ) - assert mock_stdev.call_args_list[1][1] == {"light_model": None} + assert mock_stdev.call_args_list[1][1] == {"samples_merge_map": None} caplog.clear() @@ -489,11 +486,10 @@ def test_prediction( caplog.clear() # Multiple backgrounds with sample merging + samples_merge_map = {"Total Background": ["Background", "Background 2"]} model = pyhf.Workspace(example_spec_with_multiple_background).model() # pre-fit prediction, merged samples - model_pred = model_utils.prediction( - model, samples_merge_map={"Total Background": ["Background", "Background 2"]} - ) + model_pred = model_utils.prediction(model, samples_merge_map=samples_merge_map) assert len(model_pred.model.config.samples) == len(model.config.samples) - 1 assert model_pred.model.spec == model.spec # yields from pyhf expected_data call, per-bin / per-channel uncertainty from mock @@ -511,7 +507,7 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 4 - assert mock_stdev.call_args_list[3][1] == {"light_model": model_pred.model} + assert mock_stdev.call_args_list[3][1] == {"samples_merge_map": samples_merge_map} # post-fit fit_results = FitResults( @@ -542,7 +538,9 @@ def test_prediction( # call to stdev calculation assert mock_stdev.call_count == 5 - assert mock_stdev.call_args_list[4][1] == {"light_model": model_pred.model} + assert mock_stdev.call_args_list[4][1] == { + "samples_merge_map": {"Total Background": ["Background", "Background 2"]} + } caplog.clear() From 679ea24b36356ab952276cb0f4a1e7a9544dc2a3 Mon Sep 17 00:00:00 2001 From: maly Date: Thu, 27 Mar 2025 17:44:27 +0100 Subject: [PATCH 19/19] add test for sample merging of yields to improve coverage, and raise ValueError in bad setup --- src/cabinetry/model_utils.py | 29 +++++++++++++++-------------- tests/test_model_utils.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index fa83773e..9784732f 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -108,6 +108,13 @@ def _merge_sample_yields( samples_merge_map = model.config.samples_merge_map def _sum_per_channel(i_ch: Optional[int] = None) -> np.ndarray: + # mypy not able to tell that map cannot be None-typed + # so we have to check + if samples_merge_map is None: + raise ValueError( + "Something has gone wrong in merging samples." + + " Report this to the dev team." + ) # explicit type casting because mypy worries that old_yield # will be list(list(float)) or list(float) # but this will never happen because of the if condition @@ -123,20 +130,14 @@ def _sum_per_channel(i_ch: Optional[int] = None) -> np.ndarray: remaining_samples: np.ndarray = np.delete( old_yield, model.config.merged_samples_indices, axis=0 ) - # mypy not able to tell that map cannot be None-typed - # so we have to check - if samples_merge_map is not None: - model_yields_one_channel = np.insert( - remaining_samples, - np.arange(len(samples_merge_map.keys())), - summed_sample, - axis=0, - ) - else: - log.critical( - "Something has gone wrong in merging samples." - + " Report this to the dev team." - ) + + model_yields_one_channel = np.insert( + remaining_samples, + np.arange(len(samples_merge_map.keys())), + summed_sample, + axis=0, + ) + return model_yields_one_channel new_yields = [] diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 0c4d5645..cb3b2dbf 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -71,6 +71,34 @@ def test_LightModel(example_spec_with_multiple_background): model_pred.model.NonExistent +def test__merge_sample_yields(example_spec_with_multiple_background): + model = pyhf.Workspace(example_spec_with_multiple_background).model() + samples_merge_map = {"Total Background": ["Background", "Background 2"]} + light_model = model_utils.LightModel(model, samples_merge_map) + # per-channel + yields = [[10.0], [20.0], [30.0]] + merged_yields = model_utils._merge_sample_yields( + light_model, yields, one_channel=True + ) + assert np.allclose(merged_yields, [[30.0], [30.0]]) + # per-bin + yields = [[[10.0], [20.0], [30.0]]] + merged_yields = model_utils._merge_sample_yields( + light_model, yields, one_channel=False + ) + assert np.allclose(merged_yields, [[[30.0], [30.0]]]) + + light_model = model_utils.LightModel(model, None) + with pytest.raises( + ValueError, + match="Something has gone wrong in merging samples." + + " Report this to the dev team.", + ): + merged_yields = model_utils._merge_sample_yields( + light_model, yields, one_channel=False + ) + + def test_ModelPrediction(example_spec): model = pyhf.Workspace(example_spec).model() model_yields = [[[10.0]]]