fix explanation prediction logit bugs

Anton Björklund · Aggrathon · commit 2cf3ca2bde6b · 2022-01-28T17:06:26.000+02:00
diff --git a/slise/plot.py b/slise/plot.py
@@ -396,7 +396,7 @@ def fill_density(ax, X, x, n):
         if np.var(X) == 0:
             X = np.random.normal(X[0], 1e-8, len(X))
         kde1 = gaussian_kde(X, 0.2)
-        if np.any(subset):
+        if np.sum(subset) > 1:
             kde2 = gaussian_kde(X[subset], 0.2)
         else:
             kde2 = lambda x: x * 0
diff --git a/slise/slise.py b/slise/slise.py
@@ -8,7 +8,7 @@
 from warnings import warn
 
 import numpy as np
-from matplotlib.pyplot import Figure
+from matplotlib.pyplot import Figure, yscale
 from scipy.special import expit as sigmoid
 
 from slise.data import (
@@ -73,17 +73,17 @@ def regression(
         SliseRegression: Object containing the regression result.
     """
     return SliseRegression(
-        epsilon,
-        lambda1,
-        lambda2,
-        intercept,
-        normalise,
-        initialisation,
-        beta_max,
-        max_approx,
-        max_iterations,
-        debug,
-    ).fit(X, Y, weight, init)
+        epsilon=epsilon,
+        lambda1=lambda1,
+        lambda2=lambda2,
+        intercept=intercept,
+        normalise=normalise,
+        initialisation=initialisation,
+        beta_max=beta_max,
+        max_approx=max_approx,
+        max_iterations=max_iterations,
+        debug=debug,
+    ).fit(X=X, Y=Y, weight=weight, init=init)
 
 
 def explain(
@@ -143,19 +143,19 @@ def explain(
         SliseExplainer: Object containing the explanation.
     """
     return SliseExplainer(
-        X,
-        Y,
-        epsilon,
-        lambda1,
-        lambda2,
-        normalise,
-        logit,
-        initialisation,
-        beta_max,
-        max_approx,
-        max_iterations,
-        debug,
-    ).explain(x, y, weight, init)
+        X=X,
+        Y=Y,
+        epsilon=epsilon,
+        lambda1=lambda1,
+        lambda2=lambda2,
+        normalise=normalise,
+        logit=logit,
+        initialisation=initialisation,
+        beta_max=beta_max,
+        max_approx=max_approx,
+        max_iterations=max_iterations,
+        debug=debug,
+    ).explain(x=x, y=y, weight=weight, init=init)
 
 
 class SliseRegression:
@@ -276,9 +276,9 @@ def fit(
             alpha, beta = initialise_fixed(init, X, Y, self.epsilon, self._weight)
         # Optimisation
         alpha = graduated_optimisation(
-            alpha,
-            X,
-            Y,
+            alpha=alpha,
+            X=X,
+            Y=Y,
             epsilon=self.epsilon,
             beta=beta,
             lambda1=self.lambda1,
@@ -588,7 +588,10 @@ def __init__(
             if X.shape[1] == X2.shape[1]:
                 x_cols = None
             X, x_center, x_scale = normalise_robust(X2)
-            Y, y_center, y_scale = normalise_robust(Y)
+            if logit:
+                (y_center, y_scale) = (0, 1)
+            else:
+                Y, y_center, y_scale = normalise_robust(Y)
             self._scale = DataScaling(x_center, x_scale, y_center, y_scale, x_cols)
         else:
             self._scale = None
@@ -645,9 +648,9 @@ def explain(
         else:
             alpha, beta = initialise_fixed(init, X, Y, self.epsilon, self._weight)
         alpha = graduated_optimisation(
-            alpha,
-            X,
-            Y,
+            alpha=alpha,
+            X=X,
+            Y=Y,
             epsilon=self.epsilon,
             beta=beta,
             lambda1=self.lambda1,
@@ -663,8 +666,11 @@ def explain(
         )
         self._alpha = alpha
         if self._normalise:
+            y = self._y
+            if self._logit:
+                y = limited_logit(y)
             alpha2 = self._scale.unscale_model(alpha)
-            alpha2[0] = self._y - np.sum(self._x * alpha2[1:])
+            alpha2[0] = y - np.sum(self._x * alpha2[1:])
             self._coefficients = alpha2
         else:
             self._coefficients = alpha
@@ -708,7 +714,7 @@ def predict(self, X: Union[np.ndarray, None] = None) -> np.ndarray:
             Y = mat_mul_inter(self._X, self._coefficients)
         else:
             Y = mat_mul_inter(X, self._coefficients)
-        if self.scaler.logit:
+        if self._logit:
             Y = sigmoid(Y)
         return Y
 
diff --git a/tests/test_slise.py b/tests/test_slise.py
@@ -1,9 +1,10 @@
 from warnings import catch_warnings
+
 import numpy as np
+from pytest import approx
 from scipy.special import expit as sigmoid
-
-from slise.optimisation import loss_smooth
-from slise.data import add_intercept_column, scale_same
+from slise import explain, regression
+from slise.data import add_intercept_column
 from slise.initialisation import (
     initialise_candidates,
     initialise_candidates2,
@@ -12,7 +13,7 @@
     initialise_ols,
     initialise_zeros,
 )
-from slise import regression, explain
+from slise.optimisation import loss_smooth
 from slise.utils import mat_mul_inter
 
 from .utils import *
@@ -170,6 +171,7 @@ def test_slise_reg():
 
 def test_slise_exp():
     print("Testing slise explanation")
+    np.random.seed(49)
     X, Y, mod = data_create2(100, 5)
     Y2 = sigmoid(Y)
     w = np.random.uniform(size=100) + 0.5
@@ -178,31 +180,39 @@ def test_slise_exp():
     reg = explain(X, Y, 0.1, x, y, lambda1=1e-4, lambda2=1e-4, normalise=True)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert y == approx(reg.predict(x))
     assert 1.0 >= reg.subset().mean() > 0.0
-    reg = explain(X, Y, 0.1, 19, lambda1=0.01, lambda2=0.01, normalise=True)
+    reg = explain(X, Y, 0.1, 17, lambda1=0.01, lambda2=0.01, normalise=True)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert Y[17] == approx(reg.predict(X[17]))
     assert 1.0 >= reg.subset().mean() > 0.0
     reg = explain(X, Y, 0.1, x, y, lambda1=0.01, lambda2=0.01, normalise=False)
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert y == approx(reg.predict(x))
     assert 1.0 >= reg.subset().mean() > 0.0
     reg = explain(X, Y, 0.1, x, y, lambda1=0, lambda2=0, normalise=False)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert y == approx(reg.predict(x))
     assert 1.0 >= reg.subset().mean() > 0.0
-    reg = explain(X, Y, 0.1, 19, lambda1=0.01, lambda2=0.01, normalise=False)
+    reg = explain(X, Y, 0.1, 18, lambda1=0.01, lambda2=0.01, normalise=False)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert Y[18] == approx(reg.predict(X[18]))
     assert 1.0 >= reg.subset().mean() > 0.0
     reg = explain(X, Y, 0.1, 19, lambda1=0, lambda2=0, normalise=False)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert Y[19] == approx(reg.predict(X[19]))
     assert 1.0 >= reg.subset().mean() > 0.0
     reg = explain(X, Y, 0.1, 19, lambda1=0.01, lambda2=0.01, weight=w, normalise=False)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert Y[19] == approx(reg.predict(X[19]))
     assert 1.0 >= reg.subset().mean() > 0.0
-    reg = explain(X, Y2, 0.5, 19, weight=w, normalise=True, logit=True)
+    reg = explain(X, Y2, 0.5, 20, weight=w, normalise=True, logit=True)
     reg.print()
     assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})"
+    assert Y2[20] == approx(reg.predict(X[20]))
     assert 1.0 >= reg.subset().mean() > 0.0