Add precision-dependent epsilon configuration

rmanhaeve · rmanhaeve · commit 738d5a6f6be4 · 2025-11-18T21:03:43.000+01:00
- Add EPS global constant for numerical stability operations
- Add DEFAULT_EPS_VALUES_PROB and DEFAULT_EPS_VALUES_LOGPROB dicts
  with precision-specific defaults (float16, bfloat16, float32, float64)
- Add set_eps(eps) function to configure epsilon globally
- Remove eps parameters from all functions (log1mexp, negate_real,
  layers, forward methods) - use global EPS constant instead
- Export set_eps, EPS, and default value dicts from klay.torch
diff --git a/src/klay/torch/__init__.py b/src/klay/torch/__init__.py
@@ -2,7 +2,13 @@
 from torch import nn
 
 from .layers import ProbabilisticCircuitLayer, get_semiring
-from .utils import unroll_ixs
+from .utils import (
+    unroll_ixs,
+    set_eps,
+    EPS,
+    DEFAULT_EPS_VALUES_PROB,
+    DEFAULT_EPS_VALUES_LOGPROB,
+)
 
 
 def _create_layers(sum_layer, prod_layer, ixs_in, ixs_out):
@@ -24,13 +30,13 @@ def __init__(self, ixs_in, ixs_out, semiring='real'):
             get_semiring(semiring, self.is_probabilistic())
         self.layers = _create_layers(self.sum_layer, self.prod_layer, ixs_in, ixs_out)
 
-    def forward(self, x_pos, x_neg=None, eps=0):
-        x = self.encode_input(x_pos, x_neg, eps)
+    def forward(self, x_pos, x_neg=None):
+        x = self.encode_input(x_pos, x_neg)
         return self.layers(x)
 
-    def encode_input(self, pos, neg, eps):
+    def encode_input(self, pos, neg):
         if neg is None:
-            neg = self.negate(pos, eps)
+            neg = self.negate(pos)
         x = torch.stack([pos, neg], dim=1).flatten()
         units = torch.tensor([self.zero, self.one], dtype=torch.float32, device=pos.device)
         return torch.cat([units, x])
@@ -41,14 +47,14 @@ def sparsity(self, nb_vars: int) -> float:
         dense_params = sum(layer_widths[i] * layer_widths[i + 1] for i in range(len(layer_widths) - 1))
         return sparse_params / dense_params
 
-    def to_pc(self, x_pos, x_neg=None, eps=0):
+    def to_pc(self, x_pos, x_neg=None):
         """ Converts the circuit into a probabilistic circuit."""
         assert self.semiring == "log" or self.semiring == "real"
         pc = ProbabilisticCircuitModule([], [], self.semiring)
         print("Making PC", pc.sum_layer, pc.sum_layer)
         layers = []
 
-        x = self.encode_input(x_pos, x_neg, eps)
+        x = self.encode_input(x_pos, x_neg)
         for i, layer in enumerate(self.layers):
             if isinstance(layer, self.sum_layer):
                 new_layer = pc.sum_layer(layer.ix_in, layer.ix_out)
@@ -76,7 +82,7 @@ def sample(self):
         return y[2::2]
 
     def condition(self, x_pos, x_neg):
-        x = self.encode_input(x_pos, x_neg, None)
+        x = self.encode_input(x_pos, x_neg)
         for layer in self.layers:
             x = layer.condition(x) \
                 if isinstance(layer, ProbabilisticCircuitLayer) \
diff --git a/src/klay/torch/layers.py b/src/klay/torch/layers.py
@@ -1,7 +1,7 @@
 import torch
 from torch import nn
 
-from .utils import negate_real, log1mexp
+from .utils import negate_real, log1mexp, EPS
 
 
 class CircuitLayer(nn.Module):
@@ -14,7 +14,7 @@ def __init__(self, ix_in, ix_out):
 
     def _scatter_forward(self, x: torch.Tensor, reduce: str, **kwargs):
         if reduce == "logsumexp":
-            return self._scatter_logsumexp_forward(x, **kwargs)
+            return self._scatter_logsumexp_forward(x)
         output = torch.empty(self.out_shape, dtype=x.dtype, device=x.device)
         output = torch.scatter_reduce(output, 0, index=self.ix_out, src=x, reduce=reduce, include_self=False)
         return output
@@ -31,9 +31,9 @@ def _safe_exp(self, x: torch.Tensor):
         x.nan_to_num_(nan=0., posinf=float('inf'), neginf=float('-inf'))
         return torch.exp(x), max_output
 
-    def _scatter_logsumexp_forward(self, x: torch.Tensor, eps: float):
+    def _scatter_logsumexp_forward(self, x: torch.Tensor):
         x, max_output = self._safe_exp(x)
-        output = torch.full(self.out_shape, eps, dtype=x.dtype, device=x.device)
+        output = torch.full(self.out_shape, EPS, dtype=x.dtype, device=x.device)
         output = torch.scatter_add(output, 0, index=self.ix_out, src=x)
         output = torch.log(output) + max_output
         return output
@@ -63,8 +63,8 @@ def forward(self, x):
 
 
 class LogSumLayer(CircuitLayer):
-    def forward(self, x, eps=10e-16):
-        return self._scatter_forward(x[self.ix_in], "logsumexp", eps=eps)
+    def forward(self, x):
+        return self._scatter_forward(x[self.ix_in], "logsumexp")
 
 
 class ProbabilisticCircuitLayer(CircuitLayer):
@@ -79,15 +79,15 @@ def get_edge_weights(self):
 
     def renorm_weights(self, x):
         with torch.no_grad():
-            self.weights.data = self.get_log_edge_weights(0) + x
+            self.weights.data = self.get_log_edge_weights() + x
 
-    def get_log_edge_weights(self, eps):
-        norm = self._scatter_logsumexp_forward(self.weights, eps)
+    def get_log_edge_weights(self):
+        norm = self._scatter_logsumexp_forward(self.weights)
         return self.weights - norm[self.ix_out]
 
-    def sample(self, y, eps=10e-16):
-        weights = self.get_log_edge_weights(eps)
-        noise = -(-torch.log(torch.rand_like(weights) + eps) + eps).log()
+    def sample(self, y):
+        weights = self.get_log_edge_weights()
+        noise = -(-torch.log(torch.rand_like(weights) + EPS) + EPS).log()
         gumbels = weights + noise
         samples = self._scatter_forward(gumbels, "amax")
         samples = samples[self.ix_out] == gumbels
@@ -107,9 +107,9 @@ def condition(self, x):
 
 
 class ProbabilisticLogSumLayer(ProbabilisticCircuitLayer):
-    def forward(self, x, eps=10e-16):
-        x = self.get_log_edge_weights(eps) + x[self.ix_in]
-        return self._scatter_logsumexp_forward(x, eps)
+    def forward(self, x):
+        x = self.get_log_edge_weights() + x[self.ix_in]
+        return self._scatter_logsumexp_forward(x)
 
     def condition(self, x):
         y = self.forward(x)
diff --git a/src/klay/torch/utils.py b/src/klay/torch/utils.py
@@ -2,10 +2,35 @@
 
 import torch
 
+
+# Default epsilon values for different precisions
+DEFAULT_EPS_VALUES_PROB = {
+    torch.float16: 1e-4,
+    torch.bfloat16: 1e-3,
+    torch.float32: 1e-8,
+    torch.float64: 1e-15,
+}
+
+DEFAULT_EPS_VALUES_LOGPROB = {
+    torch.float16: 1e-4,
+    torch.bfloat16: 1e-3,
+    torch.float32: 1e-16,
+    torch.float64: 1e-30,
+}
+
+# Global epsilon constant - used for all numerical stability operations
+EPS = 1e-16
+
 CUTOFF = -math.log(2)
 
 
-def log1mexp(x, eps):
+def set_eps(eps: float):
+    """Set global epsilon value for numerical stability in operations."""
+    global EPS
+    EPS = eps
+
+
+def log1mexp(x):
     """
     Numerically accurate evaluation of log(1 - exp(x)) for x < 0.
     See [Maechler2012accurate]_ for details.
@@ -14,12 +39,12 @@ def log1mexp(x, eps):
     mask = CUTOFF < x  # x < 0
     return torch.where(
         mask,
-        (-x.expm1() + eps).log(),
-        (-x.exp() + eps).log1p(),
+        (-x.expm1() + EPS).log(),
+        (-x.exp() + EPS).log1p(),
     )
 
 
-def negate_real(x, eps):
+def negate_real(x):
     return 1 - x