Numerical stability issues in gradient for log1mexp (#18)

rmanhaeve · web-flow · commit e42017223251 · 2025-11-19T15:01:15.000+01:00
diff --git a/src/klay/torch/utils.py b/src/klay/torch/utils.py
@@ -5,18 +5,17 @@
 CUTOFF = -math.log(2)
 
 
-def log1mexp(x, eps):
+def log1mexp(x, eps=10e-12):
     """
     Numerically accurate evaluation of log(1 - exp(x)) for x < 0.
     See [Maechler2012accurate]_ for details.
     https://github.com/pytorch/pytorch/issues/39242
     """
     mask = CUTOFF < x  # x < 0
-    return torch.where(
-        mask,
-        (-x.expm1() + eps).log(),
-        (-x.exp() + eps).log1p(),
-    )
+    out = torch.empty_like(x)
+    out[mask] = (-x[mask].expm1() + eps).log()
+    out[~mask] = (-x[~mask].exp() + eps).log1p()
+    return out
 
 
 def negate_real(x, eps):
diff --git a/tests/test_gradient_stability.py b/tests/test_gradient_stability.py
@@ -0,0 +1,26 @@
+"""
+Test for numerical stability issues in backward pass with log probabilities.
+
+This test identifies cases where forward pass produces finite values but
+backward pass introduces NaNs, particularly when inputs are close to -inf
+or when log probabilities approach 0 (probability = 1).
+"""
+import torch
+
+from klay.torch.utils import log1mexp
+
+
+def test_log1mexp_gradient_stability():
+    test_cases = [-1e-10, -0.01, -0.1, -1.0, -10.0, -100.0, -1000.0]
+
+    for x in test_cases:
+        x = torch.tensor(x, dtype=torch.float32).requires_grad_(True)
+        out = log1mexp(x)
+
+        assert torch.isfinite(out), f"Output is not finite for {x}."
+        out.backward()
+        assert torch.isfinite(x.grad), f"Gradient is not finite for {x}."
+
+
+if __name__ == "__main__":
+    test_log1mexp_gradient_stability()