scikit-hep · jonas-eschle · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/src/hepstats/hypotests/parameters.py b/src/hepstats/hypotests/parameters.py
@@ -84,7 +84,7 @@ def __eq__(self, other):
         return values_equal.all() and name_equal
 
     def __hash__(self):
-        return hash((self.name, self.values.tostring()))
+        return hash((self.name, self.values.data.tobytes()))
 
     @property
     def ndim(self):

diff --git a/src/hepstats/utils/fit/sampling.py b/src/hepstats/utils/fit/sampling.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 from .api_check import is_valid_pdf
-from .diverse import get_value, set_values
+from .diverse import get_value
 
 
 def base_sampler(models, nevents):
@@ -58,9 +58,11 @@ def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None):
 
     params = {} if parameter is None or value is None else {parameter: value}
     for i in range(ntoys):
-        with set_values(params):
-            for s in samplers:
-                s.resample()  # do not pass parameters as arguments as it will fail in simultaneous fits
+        for s in samplers:
+            # Filter params to only those relevant to this sampler (for simultaneous fits)
+            sampler_param_names = set(s.params.keys())
+            filtered_params = {p: v for p, v in params.items() if p.name in sampler_param_names}
+            s.resample(param_values=filtered_params)
 
         if constraints is not None:
             yield {param: value[i] for param, value in sampled_constraints.items()}

diff --git a/test_simultaneous_fit.py b/test_simultaneous_fit.py
@@ -0,0 +1,185 @@
+"""
+Test simultaneous fits with different parameter sharing scenarios.
+"""
+
+from __future__ import annotations
+
+import os
+
+os.environ["TQDM_DISABLE"] = "1"
+
+import numpy as np
+import pytest
+
+zfit = pytest.importorskip("zfit")
+from zfit.loss import UnbinnedNLL  # noqa: E402
+from zfit.minimize import Minuit  # noqa: E402
+
+from hepstats.hypotests.calculators import FrequentistCalculator  # noqa: E402
+from hepstats.hypotests.parameters import POI, POIarray  # noqa: E402
+
+
+def create_no_shared_params_loss():
+    """Create simultaneous loss where PDFs share NO parameters (2 losses)."""
+    obs1 = zfit.Space("x", limits=(0.0, 3.0))
+    mu1 = zfit.Parameter("mu1", 1.0, 0.0, 3.0)
+    sigma1 = zfit.Parameter("sigma1", 0.3)
+    sigma1.floating = False
+    model1 = zfit.pdf.Gauss(obs=obs1, mu=mu1, sigma=sigma1)
+
+    obs2 = zfit.Space("y", limits=(0.0, 3.0))
+    mu2 = zfit.Parameter("mu2", 2.0, 0.0, 3.0)
+    sigma2 = zfit.Parameter("sigma2", 0.3)
+    sigma2.floating = False
+    model2 = zfit.pdf.Gauss(obs=obs2, mu=mu2, sigma=sigma2)
+
+    data1 = zfit.data.Data.from_numpy(obs=obs1, array=np.random.normal(1.0, 0.3, 500))
+    data2 = zfit.data.Data.from_numpy(obs=obs2, array=np.random.normal(2.0, 0.3, 500))
+
+    loss = UnbinnedNLL(model=model1, data=data1) + UnbinnedNLL(model=model2, data=data2)
+    return loss, mu1, 1.5, 0.0
+
+
+def create_all_shared_params_loss():
+    """Create simultaneous loss where PDFs share ALL parameters (2 losses)."""
+    mu = zfit.Parameter("mu_shared", 1.5, 0.0, 3.0)
+    sigma = zfit.Parameter("sigma_shared", 0.3)
+    sigma.floating = False
+
+    obs1 = zfit.Space("x", limits=(0.0, 3.0))
+    obs2 = zfit.Space("y", limits=(0.0, 3.0))
+
+    model1 = zfit.pdf.Gauss(obs=obs1, mu=mu, sigma=sigma)
+    model2 = zfit.pdf.Gauss(obs=obs2, mu=mu, sigma=sigma)
+
+    data1 = zfit.data.Data.from_numpy(obs=obs1, array=np.random.normal(1.5, 0.3, 500))
+    data2 = zfit.data.Data.from_numpy(obs=obs2, array=np.random.normal(1.5, 0.3, 500))
+
+    loss = UnbinnedNLL(model=model1, data=data1) + UnbinnedNLL(model=model2, data=data2)
+    return loss, mu, 2.0, 0.0
+
+
+def create_some_shared_params_loss():
+    """Create simultaneous loss where PDFs share SOME parameters (2 losses)."""
+    mu = zfit.Parameter("mu_common", 1.2, 0.0, 3.0)
+    sigma1 = zfit.Parameter("sigma1", 0.3)
+    sigma2 = zfit.Parameter("sigma2", 0.4)
+    sigma1.floating = False
+    sigma2.floating = False
+
+    obs1 = zfit.Space("x", limits=(0.0, 3.0))
+    obs2 = zfit.Space("y", limits=(0.0, 3.0))
+
+    model1 = zfit.pdf.Gauss(obs=obs1, mu=mu, sigma=sigma1)
+    model2 = zfit.pdf.Gauss(obs=obs2, mu=mu, sigma=sigma2)
+
+    data1 = zfit.data.Data.from_numpy(obs=obs1, array=np.random.normal(1.2, 0.3, 500))
+    data2 = zfit.data.Data.from_numpy(obs=obs2, array=np.random.normal(1.2, 0.4, 500))
+
+    loss = UnbinnedNLL(model=model1, data=data1) + UnbinnedNLL(model=model2, data=data2)
+    return loss, mu, 1.8, 0.5
+
+
+def create_mixed_loss():
+    """Create simultaneous loss mixing all sharing patterns (5 losses).
+
+    - Loss 1-2: Share mu_shared (POI) and sigma_shared
+    - Loss 3: Independent mu3, sigma3
+    - Loss 4-5: Share mu_shared (POI) but have independent sigmas
+    """
+    mu_shared = zfit.Parameter("mu_shared", 1.5, 0.0, 3.0)
+    sigma_shared = zfit.Parameter("sigma_shared", 0.3)
+    sigma_shared.floating = False
+
+    # Losses 1-2: All shared params
+    obs1 = zfit.Space("x1", limits=(0.0, 3.0))
+    obs2 = zfit.Space("x2", limits=(0.0, 3.0))
+    model1 = zfit.pdf.Gauss(obs=obs1, mu=mu_shared, sigma=sigma_shared)
+    model2 = zfit.pdf.Gauss(obs=obs2, mu=mu_shared, sigma=sigma_shared)
+    data1 = zfit.data.Data.from_numpy(obs=obs1, array=np.random.normal(1.5, 0.3, 500))
+    data2 = zfit.data.Data.from_numpy(obs=obs2, array=np.random.normal(1.5, 0.3, 500))
+
+    # Loss 3: No shared params (independent)
+    obs3 = zfit.Space("x3", limits=(0.0, 3.0))
+    mu3 = zfit.Parameter("mu3", 2.0, 0.0, 3.0)
+    sigma3 = zfit.Parameter("sigma3", 0.35)
+    sigma3.floating = False
+    model3 = zfit.pdf.Gauss(obs=obs3, mu=mu3, sigma=sigma3)
+    data3 = zfit.data.Data.from_numpy(obs=obs3, array=np.random.normal(2.0, 0.35, 500))
+
+    # Losses 4-5: Share mu_shared but independent sigmas
+    obs4 = zfit.Space("x4", limits=(0.0, 3.0))
+    obs5 = zfit.Space("x5", limits=(0.0, 3.0))
+    sigma4 = zfit.Parameter("sigma4", 0.25)
+    sigma5 = zfit.Parameter("sigma5", 0.4)
+    sigma4.floating = False
+    sigma5.floating = False
+    model4 = zfit.pdf.Gauss(obs=obs4, mu=mu_shared, sigma=sigma4)
+    model5 = zfit.pdf.Gauss(obs=obs5, mu=mu_shared, sigma=sigma5)
+    data4 = zfit.data.Data.from_numpy(obs=obs4, array=np.random.normal(1.5, 0.25, 500))
+    data5 = zfit.data.Data.from_numpy(obs=obs5, array=np.random.normal(1.5, 0.4, 500))
+
+    loss = (
+        UnbinnedNLL(model=model1, data=data1)
+        + UnbinnedNLL(model=model2, data=data2)
+        + UnbinnedNLL(model=model3, data=data3)
+        + UnbinnedNLL(model=model4, data=data4)
+        + UnbinnedNLL(model=model5, data=data5)
+    )
+    return loss, mu_shared, 2.0, 0.5
+
+
+@pytest.mark.parametrize(
+    "loss_factory",
+    [
+        create_no_shared_params_loss,
+        create_all_shared_params_loss,
+        create_some_shared_params_loss,
+        create_mixed_loss,
+    ],
+    ids=["no_shared", "all_shared", "some_shared", "mixed_5losses"],
+)
+def test_simultaneous_fit_null_toys(loss_factory):
+    """Test that null toys cluster around the null hypothesis value."""
+    np.random.seed(42)
+
+    loss, poi_param, null_value, alt_value = loss_factory()
+
+    calc = FrequentistCalculator(loss, Minuit(), ntoysnull=20, ntoysalt=20)
+
+    poi_null = POIarray(poi_param, [null_value])
+    poi_alt = POI(poi_param, alt_value)
+
+    toysresults = calc.get_toys_null(poi_null, poi_alt, qtilde=False)
+    toys = toysresults[POI(poi_param, null_value)]
+
+    assert np.abs(np.mean(toys.bestfit) - null_value) < 0.15, (
+        f"Expected null toys around {null_value}, got {np.mean(toys.bestfit):.3f}"
+    )
+
+
+@pytest.mark.parametrize(
+    "loss_factory",
+    [
+        create_some_shared_params_loss,
+        create_mixed_loss,
+    ],
+    ids=["some_shared", "mixed_5losses"],
+)
+def test_simultaneous_fit_alt_toys(loss_factory):
+    """Test that alt toys cluster around the alt hypothesis value."""
+    np.random.seed(42)
+
+    loss, poi_param, null_value, alt_value = loss_factory()
+
+    calc = FrequentistCalculator(loss, Minuit(), ntoysnull=20, ntoysalt=20)
+
+    poi_null = POIarray(poi_param, [null_value])
+    poi_alt = POI(poi_param, alt_value)
+
+    toysresults_alt = calc.get_toys_alt(poi_alt, poi_null, qtilde=False)
+    toys_alt = toysresults_alt[poi_alt]
+
+    assert np.abs(np.mean(toys_alt.bestfit) - alt_value) < 0.15, (
+        f"Expected alt toys around {alt_value}, got {np.mean(toys_alt.bestfit):.3f}"
+    )
diff --git a/tests/hypotests/test_calculators.py b/tests/hypotests/test_calculators.py
@@ -191,3 +191,48 @@ def test_frequentist_calculator_one_poi(constraint):
     assert all(is_valid_data(s) for s in samplers)
     loss = calc.toys_loss(mean.name)
     assert is_valid_loss(loss)
+
+
+def test_frequentist_calculator_toys_at_correct_values():
+    """Regression test: toys must be generated at the specified POI value, not data's best fit.
+
+    This test catches the bug where base_sample used set_values() context manager but
+    zfit samplers ignore current parameter values - they need param_values passed directly.
+    """
+    np.random.seed(42)
+
+    obs = zfit.Space("x", limits=(0.1, 3.0))
+    # Data centered at 1.2
+    data_array = np.random.normal(1.2, 0.3, 500)
+    data = zfit.data.Data.from_numpy(obs=obs, array=data_array)
+
+    mean = zfit.Parameter("mu_test", 1.2, 0.0, 3.0)
+    sigma = zfit.Parameter("sigma_test", 0.3)
+    sigma.floating = False
+
+    model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
+    loss = UnbinnedNLL(model=model, data=data)
+
+    calc = FrequentistCalculator(loss, Minuit(), ntoysnull=30, ntoysalt=30)
+
+    # Data best fit should be around 1.2
+    bestfit_val = calc.bestfit.params[mean]['value']
+    assert abs(bestfit_val - 1.2) < 0.1, f"Best fit should be ~1.2, got {bestfit_val}"
+
+    # Generate null toys at mu=1.8 (far from data's best fit of 1.2)
+    poi_null = POIarray(mean, [1.8])
+    poi_alt = POI(mean, 0.5)
+
+    toysresults = calc.get_toys_null(poi_null, poi_alt, qtilde=False)
+    toys = toysresults[POI(mean, 1.8)]
+
+    # Critical check: toys generated at mu=1.8 should have best fits around 1.8
+    # NOT around data's best fit of 1.2
+    mean_bestfit = np.mean(toys.bestfit)
+
+    # If the bug is present, mean would be ~1.2 (data's best fit)
+    # With correct implementation, mean should be ~1.8 (toys generated at mu=1.8)
+    assert abs(mean_bestfit - 1.8) < 0.2, (
+        f"Toys generated at mu=1.8 should have best fits ~1.8, got {mean_bestfit:.3f}. "
+        f"If ~1.2, toys are being generated at data's best fit instead of specified POI."
+    )
diff --git a/tests/modeling/test_bayesianblocks.py b/tests/modeling/test_bayesianblocks.py
@@ -18,7 +18,7 @@ def test_bayesian_blocks(cmdopt, data_gen):
             np.savez(f, be1=be1, be2=be2, be3=be3)
     elif cmdopt == "test":
         answers = np.load(answer_dir / "answers_bayesian_blocks.npz")
-        np.testing.assert_array_equal(be1, answers["be1"])
-        np.testing.assert_array_equal(be2, answers["be2"])
-        np.testing.assert_array_equal(be3, answers["be3"])
+        np.testing.assert_allclose(be1, answers["be1"])
+        np.testing.assert_allclose(be2, answers["be2"])
+        np.testing.assert_allclose(be3, answers["be3"])
         # assert(np.all(output[1] == answers['be']))