@@ -860,7 +860,7 @@ class GenPareto(Continuous):
860860 constraint is the upper wall, ``max(data) < mu - sigma/xi``, which rearranges to
861861 a *lower* bound on the shape, ``xi > -sigma / (max(data) - mu)`` (there is no
862862 upper bound -- :math:`\xi \geq 0` is an unbounded tail that always contains the
863- data). The shape is *also* floored at ``xi > -1``: for :math:`\xi \leq -1` the
863+ data). The shape is *also* floored at ``xi > -1``: for :math:`\xi < -1` the
864864 density diverges at the upper endpoint, making the likelihood unbounded (an
865865 improper posterior NUTS runs away to) -- ``xi > -1`` is the usual regularity
866866 condition. Encoding both as the lower bound of an otherwise broad prior on
@@ -880,11 +880,11 @@ class GenPareto(Continuous):
880880 assert pareto_mu < xmin # exceedances lie strictly above the threshold
881881
882882 with pm.Model():
883- # sigma carries the data's units, so a broad prior on log(sigma) makes
884- # the shape xi inference invariant to rescaling the data.
883+ # sigma carries the data's units; a broad prior on log(sigma) keeps the
884+ # shape xi inference robust across many orders of magnitude of data scale .
885885 pareto_sigma = pm.LogNormal("pareto_sigma", mu=0.0, sigma=10.0)
886886 # xi floored at the data-in-support bound -sigma/(xmax-mu) AND at -1
887- # (xi <= -1 diverges the density at the upper wall -> unbounded likelihood).
887+ # (xi < -1 diverges the density at the upper wall -> unbounded likelihood).
888888 pareto_xi = pm.TruncatedNormal(
889889 "pareto_xi",
890890 mu=0.0,
@@ -1067,12 +1067,12 @@ class ExtGenPareto(Continuous):
10671067 assert pareto_mu < xmin # strict: a kappa < 1 density diverges at x = mu
10681068
10691069 with pm.Model():
1070- # sigma carries the data's units, so a broad prior on log(sigma) makes
1071- # the shape xi inference invariant to rescaling the data.
1070+ # sigma carries the data's units; a broad prior on log(sigma) keeps the
1071+ # shape xi inference robust across many orders of magnitude of data scale .
10721072 pareto_sigma = pm.LogNormal("pareto_sigma", mu=0.0, sigma=10.0)
10731073 pareto_kappa = pm.Gamma("pareto_kappa", alpha=2, beta=1)
10741074 # xi floored at the data-in-support bound -sigma/(xmax-mu) AND at -1
1075- # (xi <= -1 diverges the density at the upper wall -> unbounded likelihood).
1075+ # (xi < -1 diverges the density at the upper wall -> unbounded likelihood).
10761076 pareto_xi = pm.TruncatedNormal(
10771077 "pareto_xi",
10781078 mu=0.0,
@@ -1093,7 +1093,7 @@ class ExtGenPareto(Continuous):
10931093 ``kappa < 1`` density diverges; fixing the threshold keeps that singularity away
10941094 from the data. A *free* ``mu`` slides up to ``min(data)`` to sit on the
10951095 divergence -- an unbounded likelihood / improper posterior, the lower-endpoint
1096- mirror of the ``xi <= -1`` upper wall (NUTS pins ``mu`` to ``min(data)`` with
1096+ mirror of the ``xi < -1`` upper wall (NUTS pins ``mu`` to ``min(data)`` with
10971097 ``kappa < 1``). If the threshold must be estimated, floor ``kappa >= 1`` (no
10981098 lower divergence) or put a prior on ``min(data) - mu`` that vanishes at 0.
10991099 """
@@ -1342,35 +1342,33 @@ class _ExtGenParetoPIT(_GPDProbabilityIntegralTransform):
13421342
13431343 @staticmethod
13441344 def _excess_from_y (value , mu , sigma , xi , kappa ):
1345- # m = -log(S_F), the GPD-survival exponent, recovered from y = logit(F_ext).
1345+ # m = -log(1 - H), the GPD-survival exponent, from the carrier inverse
1346+ # H = F_ext ** (1/kappa) = exp(-a), a = -log(F_ext)/kappa >= 0, and
1347+ # y = logit(F_ext). So m = -log(1 - exp(-a)) = -log1mexp(-a).
13461348 #
1347- # Bulk (value < cutoff): m = -log(1 - F_ext ** (1/kappa)) from log F_ext =
1348- # -softplus(-y), via the shared log1mexp inverse so a tiny GPD survival (the
1349- # small-kappa regime) is not rounded away to 0. Exact, so it round-trips for
1350- # every kappa down to where the carrier underflows (see the class docstring's
1351- # kappa note).
1349+ # Bulk (value < cutoff): a from log F_ext = -softplus(-y); the log1mexp keeps a
1350+ # tiny GPD survival from rounding to 0 in the small-kappa regime.
13521351 #
1353- # Tail (value >= cutoff): log F_ext = -softplus(-y) rounds to 0 once exp(-y)
1354- # underflows, sending m -> inf though the quantile is finite. There
1355- # S_ext = exp(-t) is tiny (t = softplus(y)) and S_F ~ S_ext / kappa, so
1356- # m -> t + log(kappa) - log(_log1p_div(-exp(-t))),
1357- # built without forming exp(-y) before a log, finite until t overflows. The
1358- # two branches agree to machine precision at the switch, and both run on
1359- # clamped inputs so the discarded one (and its gradient) stays finite.
1352+ # Tail (value >= cutoff): -softplus(-y) underflows to 0, so carry log(a) instead
1353+ # of a. With S_ext = exp(-t), t = softplus(y),
1354+ # -log(F_ext) = -log1p(-S_ext) = S_ext * _log1p_div(-S_ext),
1355+ # so log_a = - t + log(_log1p_div(-S_ext)) - log(kappa), finite until t overflows.
1356+ # m = -log1mexp(-a), with a -> 0 (m ~ -log_a) and a -> inf (m ~ 0) split out so a
1357+ # itself never over/underflows. Both branches run on clamped inputs so the
1358+ # discarded one (and its gradient) stays finite.
13601359 #
1361- # The crossover is where exp(-y) underflows, which tracks the dtype's exponent
1362- # range (~700 for float64, ~80 for float32) -- derive it from finfo rather than
1363- # hard-coding 700, which under float32 would route y in ~[80, 700) through the
1364- # bulk branch where log F_ext has already underflowed (m -> +inf). float64 is
1365- # unchanged: min(700, 708.4 - 8) = 700.
1360+ # cutoff = where exp(-y) underflows, dtype-aware (~700 float64, ~80 float32);
1361+ # float64 is min(700, 708.4 - 8) = 700.
13661362 cutoff = np .asarray (
13671363 min (700.0 , float (- np .log (np .finfo (value .dtype ).tiny )) - 8.0 ), dtype = value .dtype
13681364 )
13691365 t = pt .softplus (value )
13701366 log_F = - pt .softplus (- pt .minimum (value , cutoff ))
13711367 m_bulk = _ext_gpd_excess_from_log_prob (log_F , kappa )
1372- s = pt .exp (- pt .maximum (t , cutoff ))
1373- m_tail = t + pt .log (kappa ) - pt .log (_log1p_div (- s ))
1368+ s = pt .exp (- pt .maximum (t , cutoff )) # S_ext, clamped so _log1p_div stays finite
1369+ log_a = - t + pt .log (_log1p_div (- s )) - pt .log (kappa )
1370+ a = pt .exp (pt .minimum (log_a , 700.0 ))
1371+ m_tail = pt .switch (log_a < - 36.0 , - log_a , - pt .log1mexp (- a ))
13741372 return pt .switch (value < cutoff , m_bulk , m_tail )
13751373
13761374 @staticmethod
0 commit comments