Skip to content

Commit 235d0de

Browse files
maresbclaude
andcommitted
Fix ExtGPD transform tail for subnormal kappa; tighten doc claims
_ExtGenParetoPIT._excess_from_y used the asymptotic S_F ~ S_ext/kappa in the tail branch, valid only when exp(-y)/kappa << 1. For subnormal kappa that ratio is O(1) at representable y (e.g. y=710, kappa=4e-309: exp(-y)/kappa ~ 1.1), so it returned a negative "excess" (-0.112 vs the true 0.395), collapsing backward onto the floor and breaking the round-trip (forward(backward(710)) = 703.5, not 710). Invert the carrier exactly instead: a = -log(F_ext)/kappa, m = -log(1 - exp(-a)), carried as log_a = -t + log(_log1p_div(-S_ext)) - log(kappa) so it survives S_ext underflow, with the a->0 (m ~ -log_a) and a->inf (m ~ 0) limits split out. This preserves the large-kappa tail (m ~ y + log kappa) and fixes the subnormal case; regression test asserts excess(710, 4e-309) ~ 0.3954 and a clean round-trip. Doc fixes: the GPD wall density diverges for xi < -1, not xi <= -1 (xi = -1 is the finite uniform boundary); the LogNormal(0,10) sigma prior is robust across scales, not scale-invariant (its location is fixed in the data's units). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 40f3f8d commit 235d0de

2 files changed

Lines changed: 50 additions & 33 deletions

File tree

pymc_extras/distributions/continuous.py

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ class GenPareto(Continuous):
860860
constraint is the upper wall, ``max(data) < mu - sigma/xi``, which rearranges to
861861
a *lower* bound on the shape, ``xi > -sigma / (max(data) - mu)`` (there is no
862862
upper bound -- :math:`\xi \geq 0` is an unbounded tail that always contains the
863-
data). The shape is *also* floored at ``xi > -1``: for :math:`\xi \leq -1` the
863+
data). The shape is *also* floored at ``xi > -1``: for :math:`\xi < -1` the
864864
density diverges at the upper endpoint, making the likelihood unbounded (an
865865
improper posterior NUTS runs away to) -- ``xi > -1`` is the usual regularity
866866
condition. Encoding both as the lower bound of an otherwise broad prior on
@@ -880,11 +880,11 @@ class GenPareto(Continuous):
880880
assert pareto_mu < xmin # exceedances lie strictly above the threshold
881881
882882
with pm.Model():
883-
# sigma carries the data's units, so a broad prior on log(sigma) makes
884-
# the shape xi inference invariant to rescaling the data.
883+
# sigma carries the data's units; a broad prior on log(sigma) keeps the
884+
# shape xi inference robust across many orders of magnitude of data scale.
885885
pareto_sigma = pm.LogNormal("pareto_sigma", mu=0.0, sigma=10.0)
886886
# xi floored at the data-in-support bound -sigma/(xmax-mu) AND at -1
887-
# (xi <= -1 diverges the density at the upper wall -> unbounded likelihood).
887+
# (xi < -1 diverges the density at the upper wall -> unbounded likelihood).
888888
pareto_xi = pm.TruncatedNormal(
889889
"pareto_xi",
890890
mu=0.0,
@@ -1067,12 +1067,12 @@ class ExtGenPareto(Continuous):
10671067
assert pareto_mu < xmin # strict: a kappa < 1 density diverges at x = mu
10681068
10691069
with pm.Model():
1070-
# sigma carries the data's units, so a broad prior on log(sigma) makes
1071-
# the shape xi inference invariant to rescaling the data.
1070+
# sigma carries the data's units; a broad prior on log(sigma) keeps the
1071+
# shape xi inference robust across many orders of magnitude of data scale.
10721072
pareto_sigma = pm.LogNormal("pareto_sigma", mu=0.0, sigma=10.0)
10731073
pareto_kappa = pm.Gamma("pareto_kappa", alpha=2, beta=1)
10741074
# xi floored at the data-in-support bound -sigma/(xmax-mu) AND at -1
1075-
# (xi <= -1 diverges the density at the upper wall -> unbounded likelihood).
1075+
# (xi < -1 diverges the density at the upper wall -> unbounded likelihood).
10761076
pareto_xi = pm.TruncatedNormal(
10771077
"pareto_xi",
10781078
mu=0.0,
@@ -1093,7 +1093,7 @@ class ExtGenPareto(Continuous):
10931093
``kappa < 1`` density diverges; fixing the threshold keeps that singularity away
10941094
from the data. A *free* ``mu`` slides up to ``min(data)`` to sit on the
10951095
divergence -- an unbounded likelihood / improper posterior, the lower-endpoint
1096-
mirror of the ``xi <= -1`` upper wall (NUTS pins ``mu`` to ``min(data)`` with
1096+
mirror of the ``xi < -1`` upper wall (NUTS pins ``mu`` to ``min(data)`` with
10971097
``kappa < 1``). If the threshold must be estimated, floor ``kappa >= 1`` (no
10981098
lower divergence) or put a prior on ``min(data) - mu`` that vanishes at 0.
10991099
"""
@@ -1342,35 +1342,33 @@ class _ExtGenParetoPIT(_GPDProbabilityIntegralTransform):
13421342

13431343
@staticmethod
13441344
def _excess_from_y(value, mu, sigma, xi, kappa):
1345-
# m = -log(S_F), the GPD-survival exponent, recovered from y = logit(F_ext).
1345+
# m = -log(1 - H), the GPD-survival exponent, from the carrier inverse
1346+
# H = F_ext ** (1/kappa) = exp(-a), a = -log(F_ext)/kappa >= 0, and
1347+
# y = logit(F_ext). So m = -log(1 - exp(-a)) = -log1mexp(-a).
13461348
#
1347-
# Bulk (value < cutoff): m = -log(1 - F_ext ** (1/kappa)) from log F_ext =
1348-
# -softplus(-y), via the shared log1mexp inverse so a tiny GPD survival (the
1349-
# small-kappa regime) is not rounded away to 0. Exact, so it round-trips for
1350-
# every kappa down to where the carrier underflows (see the class docstring's
1351-
# kappa note).
1349+
# Bulk (value < cutoff): a from log F_ext = -softplus(-y); the log1mexp keeps a
1350+
# tiny GPD survival from rounding to 0 in the small-kappa regime.
13521351
#
1353-
# Tail (value >= cutoff): log F_ext = -softplus(-y) rounds to 0 once exp(-y)
1354-
# underflows, sending m -> inf though the quantile is finite. There
1355-
# S_ext = exp(-t) is tiny (t = softplus(y)) and S_F ~ S_ext / kappa, so
1356-
# m -> t + log(kappa) - log(_log1p_div(-exp(-t))),
1357-
# built without forming exp(-y) before a log, finite until t overflows. The
1358-
# two branches agree to machine precision at the switch, and both run on
1359-
# clamped inputs so the discarded one (and its gradient) stays finite.
1352+
# Tail (value >= cutoff): -softplus(-y) underflows to 0, so carry log(a) instead
1353+
# of a. With S_ext = exp(-t), t = softplus(y),
1354+
# -log(F_ext) = -log1p(-S_ext) = S_ext * _log1p_div(-S_ext),
1355+
# so log_a = -t + log(_log1p_div(-S_ext)) - log(kappa), finite until t overflows.
1356+
# m = -log1mexp(-a), with a -> 0 (m ~ -log_a) and a -> inf (m ~ 0) split out so a
1357+
# itself never over/underflows. Both branches run on clamped inputs so the
1358+
# discarded one (and its gradient) stays finite.
13601359
#
1361-
# The crossover is where exp(-y) underflows, which tracks the dtype's exponent
1362-
# range (~700 for float64, ~80 for float32) -- derive it from finfo rather than
1363-
# hard-coding 700, which under float32 would route y in ~[80, 700) through the
1364-
# bulk branch where log F_ext has already underflowed (m -> +inf). float64 is
1365-
# unchanged: min(700, 708.4 - 8) = 700.
1360+
# cutoff = where exp(-y) underflows, dtype-aware (~700 float64, ~80 float32);
1361+
# float64 is min(700, 708.4 - 8) = 700.
13661362
cutoff = np.asarray(
13671363
min(700.0, float(-np.log(np.finfo(value.dtype).tiny)) - 8.0), dtype=value.dtype
13681364
)
13691365
t = pt.softplus(value)
13701366
log_F = -pt.softplus(-pt.minimum(value, cutoff))
13711367
m_bulk = _ext_gpd_excess_from_log_prob(log_F, kappa)
1372-
s = pt.exp(-pt.maximum(t, cutoff))
1373-
m_tail = t + pt.log(kappa) - pt.log(_log1p_div(-s))
1368+
s = pt.exp(-pt.maximum(t, cutoff)) # S_ext, clamped so _log1p_div stays finite
1369+
log_a = -t + pt.log(_log1p_div(-s)) - pt.log(kappa)
1370+
a = pt.exp(pt.minimum(log_a, 700.0))
1371+
m_tail = pt.switch(log_a < -36.0, -log_a, -pt.log1mexp(-a))
13741372
return pt.switch(value < cutoff, m_bulk, m_tail)
13751373

13761374
@staticmethod

tests/distributions/test_continuous.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,10 @@ def test_logcdf(self):
221221
# invalid-xi probe (there is no invalid xi) while still exercising every listed
222222
# value -- the exponential limit xi = 0 and both tails. Two deliberate bounds on
223223
# the range:
224-
# * strictly > -1: at xi <= -1 the GPD becomes (sub-)uniform with a *finite*
225-
# density at its closed upper endpoint, a measure-zero point where the
226-
# open-support convention here (-inf at the wall) legitimately differs from
227-
# SciPy. ``TestGenParetoBoundaries`` covers the xi < 0 wall directly.
224+
# * strictly > -1: at the closed upper endpoint the open-support convention here
225+
# (-inf at the wall) legitimately differs from SciPy -- a measure-zero point --
226+
# and for xi < -1 the density there even diverges. ``TestGenParetoBoundaries``
227+
# covers the xi < 0 wall directly.
228228
# * <= 1: a heavier tail (e.g. xi = 5) pushes the q = 0.99 quantile to ~1e10,
229229
# where ``check_icdf``'s *absolute* tolerance fails on a value that is in
230230
# fact correct to ~1e-15 relative -- false precision, not a real error.
@@ -1167,9 +1167,28 @@ def test_excess_from_y_upper_tail_is_finite_under_float32(self):
11671167
for yi in (90.0, 200.0, 700.0, 5000.0):
11681168
m = float(fn(np.float32(yi)))
11691169
assert np.isfinite(m)
1170-
# m ~ y + log(kappa) far out in the tail (S_F ~ S_ext / kappa).
1170+
# m ~ y + log(kappa) far out in the tail (S_ext / kappa << 1 there).
11711171
np.testing.assert_allclose(m, yi + np.log(2.0), rtol=1e-3)
11721172

1173+
def test_excess_from_y_resolves_subnormal_kappa_tail(self):
1174+
# For subnormal kappa, exp(-y)/kappa is O(1) in the tail, so the carrier
1175+
# survival must be inverted exactly (m = -log(1 - F_ext ** (1/kappa))) -- a
1176+
# S_ext/kappa << 1 asymptotic returns a negative "excess" and collapses
1177+
# backward onto the floor. Reference m(y=710, kappa=4e-309, xi=0) = 0.3953903.
1178+
y = pt.dscalar("y")
1179+
excess = float(
1180+
pytensor.function([y], _ExtGenParetoPIT._excess_from_y(y, 0.0, 1.0, 0.0, 4e-309))(710.0)
1181+
)
1182+
assert excess > 0.0
1183+
np.testing.assert_allclose(excess, 0.395390331, rtol=1e-4)
1184+
with pm.Model() as model:
1185+
x = ExtGenPareto("x", mu=0.0, sigma=1.0, xi=0.0, kappa=4e-309)
1186+
tr = model.rvs_to_transforms[x]
1187+
inputs = x.owner.inputs
1188+
yv = model.value_vars[0]
1189+
roundtrip = pytensor.function([yv], tr.forward(tr.backward(yv, *inputs), *inputs))
1190+
np.testing.assert_allclose(float(roundtrip(710.0)), 710.0, rtol=1e-5)
1191+
11731192
def test_jacobian_gradient_is_continuous_through_xi_zero(self):
11741193
# The headline reason for the probability-integral transform: with xi a
11751194
# random variable, the transformed logp must be C1 in xi across 0. An

0 commit comments

Comments
 (0)