Skip to content

Commit c3e251b

Browse files
authored
Merge pull request #12 from edahelsinki/select_dists
Select variables when using plot_dist
2 parents 2b4014e + 41e2629 commit c3e251b

16 files changed

+248
-170
lines changed

.github/workflows/python-publish.yml

+12-13
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,15 @@ jobs:
1515
id-token: write
1616

1717
steps:
18-
- uses: actions/checkout@v3
19-
- name: Set up Python
20-
uses: actions/setup-python@v3
21-
with:
22-
python-version: "3.x"
23-
- name: Install dependencies
24-
run: |
25-
python -m pip install --upgrade pip
26-
python -m pip install build
27-
- name: Build package
28-
run: python -m build
29-
- name: Publish package
30-
uses: pypa/[email protected]
18+
- uses: actions/checkout@v4
19+
- name: Set up Python
20+
uses: actions/setup-python@v5
21+
with:
22+
python-version: '3.x'
23+
- name: Install dependencies
24+
run: |
25+
python -m pip install --upgrade pip build
26+
- name: Build package
27+
run: python -m build
28+
- name: Publish package to PyPI
29+
uses: pypa/gh-action-pypi-publish@release/v1

.github/workflows/python-pytest.yml

+36-11
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,49 @@ on:
1212
workflow_dispatch:
1313

1414
jobs:
15-
build:
15+
test:
1616
runs-on: ubuntu-latest
1717
strategy:
1818
matrix:
19-
python-version: ["3.8", "3.9", "3.10", "3.11"]
20-
19+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
2120
steps:
22-
- uses: actions/checkout@v3
21+
- uses: actions/checkout@v4
2322
- name: Set up Python ${{ matrix.python-version }}
24-
uses: actions/setup-python@v3
23+
uses: actions/setup-python@v5
2524
with:
2625
python-version: ${{ matrix.python-version }}
26+
cache: "pip"
2727
- name: Install dependencies
2828
run: |
29-
python -m pip install --upgrade pip
30-
python -m pip install pytest build
31-
python -m pip install .
32-
- name: Build package
33-
run: python -m build
29+
python -m pip install --upgrade pip pytest pytest-cov
30+
python -m pip install -e .
3431
- name: Test with pytest
35-
run: pytest
32+
run: |
33+
pytest -k test_optim
34+
NUMBA_DISABLE_JIT=1 pytest --cov-report term --cov=slise/ --cov-fail-under=9
35+
36+
build:
37+
runs-on: ubuntu-latest
38+
steps:
39+
- uses: actions/checkout@v4
40+
- uses: actions/setup-python@v5
41+
with:
42+
python-version: "3.x"
43+
- run: python -m pip install --upgrade pip build
44+
- name: Build package
45+
run: |
46+
python -m build
47+
python -c "import os, glob; assert os.path.getsize(sorted(glob.glob('dist/slise-*.whl'))[-1]) > 10_000"
48+
49+
lint:
50+
runs-on: ubuntu-latest
51+
steps:
52+
- uses: actions/checkout@v4
53+
- uses: actions/setup-python@v5
54+
with:
55+
python-version: "3.x"
56+
- run: python -m pip install --upgrade pip ruff
57+
- name: Lint with Ruff
58+
run: |
59+
ruff check --output-format=github
60+
ruff format --check

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "slise"
3-
version = "2.2.3"
3+
version = "2.2.4"
44
authors = [{ name = "Anton Björklund", email = "[email protected]" }]
55
description = "The SLISE algorithm for robust regression and explanations of black box models"
66
readme = "README.md"
@@ -28,7 +28,7 @@ dependencies = [
2828
]
2929

3030
[project.optional-dependencies]
31-
dev = ["pytest", "black[jupyter]", "pylint", "IPython"]
31+
dev = ["pytest", "pytest-cov", "black[jupyter]", "pylint", "IPython", "ruff"]
3232
tbb = ["tbb"]
3333

3434
[project.urls]

slise/__init__.py

+44-40
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,53 @@
11
"""
2-
SLISE - Sparse Linear Subset Explanations
3-
-----------------------------------------
4-
5-
The SLISE algorithm can be used for both robust regression and to explain outcomes from black box models.
6-
See [slise.slise.regression][] and [slise.slise.explain][] for referense.
7-
8-
9-
In robust regression we fit regression models that can handle data that
10-
contains outliers. SLISE accomplishes this by fitting a model such that
11-
the largest possible subset of the data items have an error less than a
12-
given value. All items with an error larger than that are considered
13-
potential outliers and do not affect the resulting model.
14-
15-
SLISE can also be used to provide local model-agnostic explanations for
16-
outcomes from black box models. To do this we replace the ground truth
17-
response vector with the predictions from the complex model. Furthermore, we
18-
force the model to fit a selected item (making the explanation local). This
19-
gives us a local approximation of the complex model with a simpler linear
20-
model. In contrast to other methods SLISE creates explanations using real
21-
data (not some discretised and randomly sampled data) so we can be sure that
22-
all inputs are valid (i.e. in the correct data manifold, and follows the
23-
constraints used to generate the data, e.g., the laws of physics).
24-
25-
26-
More in-depth details about the algorithm can be found in the papers:
27-
28-
Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K.
29-
Sparse Robust Regression for Explaining Classifiers.
30-
Discovery Science (DS 2019).
31-
Lecture Notes in Computer Science, vol 11828, Springer.
32-
https://doi.org/10.1007/978-3-030-33778-0_27
33-
34-
Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K.
35-
Robust regression via error tolerance.
36-
Data Mining and Knowledge Discovery (2022).
37-
https://doi.org/10.1007/s10618-022-00819-2
38-
2+
SLISE - Sparse Linear Subset Explanations
3+
-----------------------------------------
4+
5+
The SLISE algorithm can be used for both robust regression and to explain outcomes from black box models.
6+
See [slise.slise.regression][] and [slise.slise.explain][] for referense.
7+
8+
9+
In robust regression we fit regression models that can handle data that
10+
contains outliers. SLISE accomplishes this by fitting a model such that
11+
the largest possible subset of the data items have an error less than a
12+
given value. All items with an error larger than that are considered
13+
potential outliers and do not affect the resulting model.
14+
15+
SLISE can also be used to provide local model-agnostic explanations for
16+
outcomes from black box models. To do this we replace the ground truth
17+
response vector with the predictions from the complex model. Furthermore, we
18+
force the model to fit a selected item (making the explanation local). This
19+
gives us a local approximation of the complex model with a simpler linear
20+
model. In contrast to other methods SLISE creates explanations using real
21+
data (not some discretised and randomly sampled data) so we can be sure that
22+
all inputs are valid (i.e. in the correct data manifold, and follows the
23+
constraints used to generate the data, e.g., the laws of physics).
24+
25+
26+
More in-depth details about the algorithm can be found in the papers:
27+
28+
Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K.
29+
Sparse Robust Regression for Explaining Classifiers.
30+
Discovery Science (DS 2019).
31+
Lecture Notes in Computer Science, vol 11828, Springer.
32+
https://doi.org/10.1007/978-3-030-33778-0_27
33+
34+
Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K.
35+
Robust regression via error tolerance.
36+
Data Mining and Knowledge Discovery (2022).
37+
https://doi.org/10.1007/s10618-022-00819-2
38+
39+
Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K.
40+
Explaining any black box model using real data.
41+
Frontiers in Computer Science 5:1143904 (2023).
42+
https://doi.org/10.3389/fcomp.2023.1143904
3943
"""
4044

41-
from slise.slise import (
45+
from slise.slise import ( # noqa: F401
4246
SliseRegression,
4347
regression,
4448
SliseExplainer,
4549
explain,
4650
SliseWarning,
4751
)
48-
from slise.utils import limited_logit as logit
49-
from slise.data import normalise_robust
52+
from slise.utils import limited_logit as logit # noqa: F401
53+
from slise.data import normalise_robust # noqa: F401

slise/data.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
This script contains functions for modifying data, mainly normalisation and PCA.
2+
This script contains functions for modifying data, mainly normalisation and PCA.
33
"""
44

55
from typing import NamedTuple, Tuple, Union, Optional

slise/initialisation.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
This script contains functions for initialising alpha and beta in SLISE.
2+
This script contains functions for initialising alpha and beta in SLISE.
33
"""
44

55
from math import log
@@ -122,9 +122,7 @@ def initialise_zeros(
122122
"""
123123
epsilon = epsilon**2
124124
beta_max = min(beta_max, beta_max_init) / epsilon
125-
beta = next_beta(
126-
Y**2, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step
127-
)
125+
beta = next_beta(Y**2, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step)
128126
return np.zeros(X.shape[1]), beta
129127

130128

slise/optimisation.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
This script contains the loss functions and optimisation functions for SLISE.
2+
This script contains the loss functions and optimisation functions for SLISE.
33
"""
44

55
from math import log
@@ -502,11 +502,11 @@ def regularised_regression(
502502
lambda2 = float(lambda2)
503503
assert X.shape[0] == len(Y), f"Different lengths {X.shape[0]} != {len(Y)}"
504504
if weight is None:
505-
lf = lambda alpha: _ridge_numba(alpha, X, Y, lambda2)
505+
lf = lambda alpha: _ridge_numba(alpha, X, Y, lambda2) # noqa: E731
506506
else:
507507
weight = np.ascontiguousarray(weight, dtype=np.float64)
508508
assert Y.shape == weight.shape, f"Different shapes {Y.shape} != {weight.shape}"
509-
lf = lambda alpha: _ridge_numbaw(alpha, X, Y, lambda2, weight)
509+
lf = lambda alpha: _ridge_numbaw(alpha, X, Y, lambda2, weight) # noqa: E731
510510
return owlqn(lf, np.zeros(X.shape[1], dtype=np.float64), lambda1, max_iterations)
511511

512512

@@ -547,11 +547,11 @@ def optimise_loss(
547547
epsilon = float(epsilon)
548548
beta = float(beta)
549549
if weight is None:
550-
lf = lambda alpha: _loss_grad(alpha, X, Y, epsilon, beta, lambda2)
550+
lf = lambda alpha: _loss_grad(alpha, X, Y, epsilon, beta, lambda2) # noqa: E731
551551
else:
552552
weight = np.ascontiguousarray(weight, dtype=np.float64)
553553
assert Y.shape == weight.shape, f"Different shapes {Y.shape} != {weight.shape}"
554-
lf = lambda alpha: _loss_gradw(alpha, X, Y, epsilon, beta, lambda2, weight)
554+
lf = lambda alpha: _loss_gradw(alpha, X, Y, epsilon, beta, lambda2, weight) # noqa: E731
555555
return owlqn(lf, alpha, lambda1, max_iterations)
556556

557557

@@ -576,8 +576,8 @@ def log_approximation_ratio(
576576
"""
577577
if beta1 >= beta2:
578578
return 0
579-
log_f = lambda r, beta: log_sigmoid(beta * (epsilon2 - r))
580-
dlog_g = lambda r: -beta1 * dlog_sigmoid(
579+
log_f = lambda r, beta: log_sigmoid(beta * (epsilon2 - r)) # noqa: E731
580+
dlog_g = lambda r: -beta1 * dlog_sigmoid( # noqa: E731
581581
beta1 * (epsilon2 - r)
582582
) + beta2 * dlog_sigmoid(beta2 * (epsilon2 - r))
583583
if dlog_g(0) < 0:
@@ -628,7 +628,7 @@ def next_beta(
628628
if log_approx <= log_max_approx:
629629
return beta_max
630630
else:
631-
f = (
631+
f = ( # noqa: E731
632632
lambda b: log_approximation_ratio(residuals2, epsilon2, beta, b, weight)
633633
- log_max_approx
634634
)
@@ -681,9 +681,7 @@ def _debug_log(
681681
"""
682682
residuals = (X @ alpha - Y) ** 2
683683
loss = loss_sharp(alpha, X, Y, epsilon, lambda1, lambda2, weight)
684-
bloss = loss_residuals(
685-
alpha, residuals, epsilon**2, beta, lambda1, lambda2, weight
686-
)
684+
bloss = loss_residuals(alpha, residuals, epsilon**2, beta, lambda1, lambda2, weight)
687685
epss = matching_epsilon(residuals, epsilon**2, beta, weight)
688686
beta = beta * epsilon**2
689687
print(

0 commit comments

Comments
 (0)