feat: add option to return partial results in rejection samplers (#1720)

janfb · web-flow · commit fea84219d8b7 · 2026-01-23T18:47:39.000+01:00
* feat: add return_partial_on_timeout option to rejection samplers

* review comments
diff --git a/sbi/inference/posteriors/direct_posterior.py b/sbi/inference/posteriors/direct_posterior.py
@@ -140,6 +140,7 @@ def sample(
         show_progress_bars: bool = True,
         reject_outside_prior: bool = True,
         max_sampling_time: Optional[float] = None,
+        return_partial_on_timeout: bool = False,
     ) -> Tensor:
         r"""Draw samples from the approximate posterior distribution $p(\theta|x)$.
 
@@ -159,6 +160,10 @@ def sample(
                 If exceeded, sampling is aborted and a RuntimeError is raised. Only
                 applies when `reject_outside_prior=True` (no effect otherwise since
                 direct sampling is fast).
+            return_partial_on_timeout: If True and `max_sampling_time` is exceeded,
+                return the samples collected so far instead of raising a RuntimeError.
+                A warning will be issued. Only applies when `reject_outside_prior=True`
+                (default).
         """
         num_samples = torch.Size(sample_shape).numel()
         x = self._x_else_default_x(x)
@@ -191,6 +196,7 @@ def sample(
                 proposal_sampling_kwargs={"condition": x},
                 alternative_method="build_posterior(..., sample_with='mcmc')",
                 max_sampling_time=max_sampling_time,
+                return_partial_on_timeout=return_partial_on_timeout,
             )[0]
         else:
             # Bypass rejection sampling entirely.
@@ -210,6 +216,7 @@ def sample_batched(
         show_progress_bars: bool = True,
         reject_outside_prior: bool = True,
         max_sampling_time: Optional[float] = None,
+        return_partial_on_timeout: bool = False,
     ) -> Tensor:
         r"""Draw samples from the posteriors for a batch of different xs.
 
@@ -230,6 +237,9 @@ def sample_batched(
             max_sampling_time: Optional maximum allowed sampling time in seconds.
                 If exceeded, sampling is aborted and a RuntimeError is raised. Only
                 applies when `reject_outside_prior=True`.
+            return_partial_on_timeout: If True and `max_sampling_time` is exceeded,
+                return the samples collected so far instead of raising a RuntimeError.
+                A warning will be issued. Only applies when `reject_outside_prior=True`.
 
         Returns:
             Samples from the posteriors of shape (*sample_shape, B, *input_shape)
@@ -276,6 +286,7 @@ def sample_batched(
                 proposal_sampling_kwargs={"condition": x},
                 alternative_method="build_posterior(..., sample_with='mcmc')",
                 max_sampling_time=max_sampling_time,
+                return_partial_on_timeout=return_partial_on_timeout,
             )[0]
         else:
             # Bypass rejection sampling entirely.
diff --git a/sbi/inference/posteriors/rejection_posterior.py b/sbi/inference/posteriors/rejection_posterior.py
@@ -138,6 +138,7 @@ def sample(
         show_progress_bars: bool = True,
         reject_outside_prior: bool = True,
         max_sampling_time: Optional[float] = None,
+        return_partial_on_timeout: bool = False,
     ):
         r"""Draw samples from the approximate posterior via rejection sampling.
 
@@ -164,6 +165,10 @@ def sample(
                 If exceeded, sampling is aborted and a RuntimeError is raised. Only
                 applies when `reject_outside_prior=True` (no effect otherwise since
                 direct sampling from the proposal is fast).
+            return_partial_on_timeout: If True and `max_sampling_time` is exceeded,
+                return the samples collected so far instead of raising a RuntimeError.
+                A warning will be issued. Only applies when `reject_outside_prior=True`
+                (default).
 
         Returns:
             Samples from posterior.
@@ -203,6 +208,7 @@ def sample(
                 num_iter_to_find_max=num_iter_to_find_max,
                 m=m,
                 max_sampling_time=max_sampling_time,
+                return_partial_on_timeout=return_partial_on_timeout,
                 device=self._device,
             )
         else:
diff --git a/sbi/inference/posteriors/vector_field_posterior.py b/sbi/inference/posteriors/vector_field_posterior.py
@@ -164,6 +164,7 @@ def sample(
         show_progress_bars: bool = True,
         reject_outside_prior: bool = True,
         max_sampling_time: Optional[float] = None,
+        return_partial_on_timeout: bool = False,
     ) -> Tensor:
         r"""Return samples from posterior distribution $p(\theta|x)$.
 
@@ -209,6 +210,10 @@ def sample(
                 If exceeded, sampling is aborted and a RuntimeError is raised. Only
                 applies when `reject_outside_prior=True` (no effect otherwise since
                 direct sampling does not use rejection).
+            return_partial_on_timeout: If True and `max_sampling_time` is exceeded,
+                return the samples collected so far instead of raising a RuntimeError.
+                A warning will be issued. Only applies when `reject_outside_prior=True`
+                (default).
         """
 
         if sample_with is None:
@@ -235,6 +240,7 @@ def sample(
                     show_progress_bars=show_progress_bars,
                     max_sampling_batch_size=max_sampling_batch_size,
                     max_sampling_time=max_sampling_time,
+                    return_partial_on_timeout=return_partial_on_timeout,
                 )
             else:
                 # Bypass rejection sampling entirely.
@@ -259,6 +265,7 @@ def sample(
                     max_sampling_batch_size=max_sampling_batch_size,
                     proposal_sampling_kwargs=proposal_sampling_kwargs,
                     max_sampling_time=max_sampling_time,
+                    return_partial_on_timeout=return_partial_on_timeout,
                 )
             else:
                 # Bypass rejection sampling entirely.
@@ -459,6 +466,7 @@ def sample_batched(
         show_progress_bars: bool = True,
         reject_outside_prior: bool = True,
         max_sampling_time: Optional[float] = None,
+        return_partial_on_timeout: bool = False,
     ) -> Tensor:
         r"""Given a batch of observations [x_1, ..., x_B] this function samples from
         posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
@@ -488,6 +496,9 @@ def sample_batched(
             max_sampling_time: Optional maximum allowed sampling time in seconds.
                 If exceeded, sampling is aborted and a RuntimeError is raised. Only
                 applies when `reject_outside_prior=True`.
+            return_partial_on_timeout: If True and `max_sampling_time` is exceeded,
+                return the samples collected so far instead of raising a RuntimeError.
+                A warning will be issued. Only applies when `reject_outside_prior=True`.
 
         Returns:
             Samples from the posteriors of shape (*sample_shape, B, *input_shape)
@@ -525,6 +536,7 @@ def sample_batched(
                     show_progress_bars=show_progress_bars,
                     max_sampling_batch_size=max_sampling_batch_size,
                     max_sampling_time=max_sampling_time,
+                    return_partial_on_timeout=return_partial_on_timeout,
                 )
             else:
                 # Bypass rejection sampling.
@@ -553,6 +565,7 @@ def sample_batched(
                     max_sampling_batch_size=max_sampling_batch_size,
                     proposal_sampling_kwargs=proposal_sampling_kwargs,
                     max_sampling_time=max_sampling_time,
+                    return_partial_on_timeout=return_partial_on_timeout,
                 )
             else:
                 # Bypass rejection sampling.
diff --git a/sbi/samplers/rejection/rejection.py b/sbi/samplers/rejection/rejection.py
@@ -26,6 +26,7 @@ def rejection_sample(
     num_iter_to_find_max: int = 100,
     m: float = 1.2,
     max_sampling_time: Optional[float] = None,
+    return_partial_on_timeout: bool = False,
     device: str = "cpu",
 ) -> Tuple[Tensor, Tensor]:
     r"""Return samples from a `potential_fn` obtained via rejection sampling.
@@ -57,11 +58,14 @@ def rejection_sample(
             value will ensure that the samples are indeed from the correct
             distribution, but will increase the fraction of rejected samples and thus
             computation time.
-        device: Device on which to sample.
         max_sampling_time: Optional maximum allowed sampling time (in seconds).
             If this time is exceeded, rejection sampling is aborted and a RuntimeError
-            is raised. This prevents jobs from stalling indefinitely when the
-            acceptance rate is extremely low.
+            is raised (unless `return_partial_on_timeout=True`). This prevents jobs
+            from stalling indefinitely when the acceptance rate is extremely low.
+        return_partial_on_timeout: If True and `max_sampling_time` is exceeded, return
+            the samples collected so far instead of raising a RuntimeError. A warning
+            will be issued indicating the partial return. Default is False.
+        device: Device on which to sample.
 
     Returns:
         Accepted samples and acceptance rate as scalar Tensor.
@@ -143,6 +147,16 @@ def log_prob(self, theta: Tensor, **kwargs) -> Tensor:
                 max_sampling_time is not None
                 and (time.time() - start_time) > max_sampling_time
             ):
+                num_collected = sum(s.shape[0] for s in accepted)
+                if return_partial_on_timeout and num_collected > 0:
+                    pbar.close()
+                    warnings.warn(
+                        f"Timeout exceeded after collecting {num_collected}/"
+                        f"{num_samples} samples. Returning partial results.",
+                        stacklevel=2,
+                    )
+                    samples = torch.cat(accepted)
+                    return samples, as_tensor(acceptance_rate)
                 raise RuntimeError(
                     "Sampling aborted early because rejection sampling exceeded "
                     "max_sampling_time. This is likely due to extremely low "
@@ -225,6 +239,7 @@ def accept_reject_sample(
     proposal_sampling_kwargs: Optional[Dict] = None,
     alternative_method: Optional[str] = None,
     max_sampling_time: Optional[float] = None,
+    return_partial_on_timeout: bool = False,
     **kwargs,
 ) -> Tuple[Tensor, Tensor]:
     r"""Returns samples from a proposal according to a acception criterion.
@@ -264,12 +279,16 @@ def accept_reject_sample(
         alternative_method: An alternative method for sampling from the restricted
             proposal. E.g., for SNPE, we suggest to sample with MCMC if the rejection
             rate is too high. Used only for printing during a potential warning.
+        max_sampling_time: Optional maximum allowed sampling time (in seconds).
+            If exceeded, the sampling loop is interrupted and a RuntimeError is raised
+            unless `return_partial_on_timeout=True`. This prevents infinite or
+            excessively slow rejection sampling runs, e.g. in cases of heavy leakage
+            or extremely low acceptance rates.
+        return_partial_on_timeout: If True and `max_sampling_time` is exceeded, return
+            the samples collected so far instead of raising a RuntimeError. A warning
+            will be issued indicating the partial return. Default is False.
         kwargs: Absorb additional unused arguments that can be passed to
             `rejection_sample()`. Warn if not empty.
-        max_sampling_time: Optional maximum allowed sampling time (in seconds).
-            If exceeded, the sampling loop is interrupted and a RuntimeError is raised.
-            This prevents infinite or excessively slow rejection sampling runs, e.g.
-            in cases of heavy leakage or extremely low acceptance rates.
 
     Returns:
         Accepted samples of shape `(sample_dim, batch_dim, *event_shape)`, and
@@ -318,6 +337,24 @@ def accept_reject_sample(
             max_sampling_time is not None
             and (time.time() - start_time) > max_sampling_time
         ):
+            # Check if we have any samples collected
+            num_collected = min(
+                sum(s.shape[0] for s in accepted[i]) for i in range(num_xos)
+            )
+            if return_partial_on_timeout and num_collected > 0:
+                pbar.close()
+                warnings.warn(
+                    f"Timeout exceeded after collecting {num_collected}/{num_samples}"
+                    f" samples. Returning partial results.",
+                    stacklevel=2,
+                )
+                # Return partial samples with proper shape
+                samples = [
+                    torch.cat(accepted[i], dim=0)[:num_collected]
+                    for i in range(num_xos)
+                ]
+                samples = torch.stack(samples, dim=1)
+                return samples, as_tensor(acceptance_rate, device=samples.device)
             raise RuntimeError(
                 "Sampling aborted early because rejection sampling exceeded "
                 "max_sampling_time. This is likely due to extremely low "
diff --git a/tests/rejection_sampling_test.py b/tests/rejection_sampling_test.py
@@ -123,6 +123,31 @@ def to(self, device):
     assert torch.all(no_reject == 5.0)
 
 
+def test_accept_reject_sample_partial_return():
+    """Test that return_partial_on_timeout returns collected samples."""
+
+    def accept_rare_fn(x):
+        # Accept only 1% of samples to ensure we don't finish
+        return torch.rand(x.shape[0]) < 0.01
+
+    proposal = DummyProposal()
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        samples, acceptance = accept_reject_sample(
+            proposal=proposal,
+            accept_reject_fn=accept_rare_fn,
+            num_samples=10000,  # Request many samples
+            max_sampling_time=0.001,  # Very short timeout
+            return_partial_on_timeout=True,
+        )
+        # Should have some samples (not all 10000)
+        assert samples.shape[0] > 0
+        assert samples.shape[0] < 10000
+        # Should have issued a warning
+        assert len(w) == 1
+        assert "partial results" in str(w[0].message).lower()
+
+
 def test_warn_if_outside_prior_support():
     """Test the warning utility for samples outside prior support."""
     prior = Uniform(torch.zeros(2), torch.ones(2))