From 78ef281968d7e0ad1441e0af30fec63760a5c497 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Sun, 16 Aug 2020 12:13:10 -0700 Subject: [PATCH 01/24] migrate gaussian distr to subclass of BaseDistribution --- pyglmnet/distributions.py | 80 +++++++++++++++++++++++ pyglmnet/metrics.py | 9 ++- pyglmnet/pyglmnet.py | 130 +++++++++++++++++++++++--------------- pyglmnet/utils.py | 10 ++- 4 files changed, 171 insertions(+), 58 deletions(-) create mode 100644 pyglmnet/distributions.py diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py new file mode 100644 index 00000000..8a96c96e --- /dev/null +++ b/pyglmnet/distributions.py @@ -0,0 +1,80 @@ +"""Exponential family distributions for GLM estimators.""" + +from abc import ABC, abstractmethod +import numpy as np + + +class BaseDistribution(ABC): + """Base class for distributions.""" + + def __init__(self): + """init.""" + pass + + @abstractmethod + def mu(self, z): + """Inverse link function.""" + pass + + @abstractmethod + def grad_mu(self, z): + """Gradient of inverse link.""" + pass + + @abstractmethod + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + pass + + def grad_log_likelihood(self): + """Gradient of L2-penalized log likelihood.""" + msg = (f"""Gradients of log likelihood are not specified for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + + def gradhess_log_likelihood_1d(self): + """One-dimensional Gradient and Hessian of log likelihood.""" + msg = (f"""Gradients and Hessians of 1-d log likelihood are not specified for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + + def _z(self, beta0, beta, X): + """Compute z to be passed through non-linearity.""" + return beta0 + np.dot(X, beta) + + +class Gaussian(BaseDistribution): + """Base class for distributions.""" + + def __init__(self): + """init.""" + pass + + def mu(self, z): + """Inverse link function.""" + mu = z + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = np.ones_like(z) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + log_likelihood = -0.5 * np.sum((y - y_hat) ** 2) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + grad_beta0 = np.sum((mu - y) * grad_mu) + grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + gk = np.sum((z - y) * xk) + hk = np.sum(xk * xk) + return gk, hk diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index 27bd1080..60225b9f 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -28,7 +28,8 @@ def deviance(y, yhat, distr, theta): else: LS = 0 - L1 = _logL(distr, y, yhat, theta=theta) + # L1 = _logL(distr, y, yhat, theta=theta) + L1 = distr.log_likelihood(y, yhat) score = -2 * (L1 - LS) return score @@ -60,8 +61,10 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): else: LS = 0 - L0 = _logL(distr, y, ynull_, theta=theta) - L1 = _logL(distr, y, yhat, theta=theta) + # L0 = _logL(distr, y, ynull_, theta=theta) + # L1 = _logL(distr, y, yhat, theta=theta) + L0 = distr.log_likelihood(y, ynull_) + L1 = distr.log_likelihood(y, yhat) if distr in ['softplus', 'poisson', 'neg-binomial']: score = (1 - (LS - L1) / (LS - L0)) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 9e00a9c1..5a18950a 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -14,10 +14,22 @@ from .externals.sklearn.utils import check_random_state, check_array, check_X_y from .externals.sklearn.utils.validation import check_is_fitted +from .distributions import BaseDistribution, Gaussian + ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] +def _get_distr(distr): + if isinstance(distr, BaseDistribution): + return distr + + elif distr == "gaussian": + distr = Gaussian() + + return distr + + def _probit_g1(z, pdfz, cdfz, thresh=5): res = np.zeros_like(z) res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) @@ -244,51 +256,59 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, Tau = np.eye(beta.shape[0]) InvCov = np.dot(Tau.T, Tau) - z = _z(beta[0], beta[1:], X, fit_intercept) - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - - grad_beta0 = 0. - if distr in ['poisson', 'softplus']: - if fit_intercept: - grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) - grad_beta = ((np.dot(grad_mu.T, X) - - np.dot((y * grad_mu / mu).T, X)).T) - - elif distr == 'gaussian': - if fit_intercept: - grad_beta0 = np.sum((mu - y) * grad_mu) - grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T - - elif distr == 'binomial': - if fit_intercept: - grad_beta0 = np.sum(mu - y) - grad_beta = np.dot((mu - y).T, X).T - - elif distr == 'probit': - grad_logl = (y * _probit_g3(z, grad_mu, mu) - - (1 - y) * _probit_g4(z, grad_mu, mu)) - if fit_intercept: - grad_beta0 = -np.sum(grad_logl) - grad_beta = -np.dot(grad_logl.T, X).T + # z = _z(beta[0], beta[1:], X, fit_intercept) + # mu = _mu(distr, z, eta, fit_intercept) + # grad_mu = _grad_mu(distr, z, eta) - elif distr == 'gamma': - nu = 1. - grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu - if fit_intercept: - grad_beta0 = -nu * np.sum(grad_logl) - grad_beta = -nu * np.dot(grad_logl.T, X).T - - elif distr == 'neg-binomial': - partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) - - if fit_intercept: - grad_beta0 = np.sum(partial_beta_0) - - grad_beta = np.dot(partial_beta_0.T, X) + if fit_intercept: + beta0_, beta_ = beta[0], beta[1:] + else: + beta0_, beta_ = 0., beta + grad_beta0, grad_beta = distr.grad_log_likelihood(X, y, beta0_, beta_) + grad_beta0 = grad_beta0 if fit_intercept else 0. + + # grad_beta0 = 0. + # if distr in ['poisson', 'softplus']: + # if fit_intercept: + # grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) + # grad_beta = ((np.dot(grad_mu.T, X) - + # np.dot((y * grad_mu / mu).T, X)).T) + # + # elif distr == 'gaussian': + # if fit_intercept: + # grad_beta0 = np.sum((mu - y) * grad_mu) + # grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T + # + # elif distr == 'binomial': + # if fit_intercept: + # grad_beta0 = np.sum(mu - y) + # grad_beta = np.dot((mu - y).T, X).T + # + # elif distr == 'probit': + # grad_logl = (y * _probit_g3(z, grad_mu, mu) - + # (1 - y) * _probit_g4(z, grad_mu, mu)) + # if fit_intercept: + # grad_beta0 = -np.sum(grad_logl) + # grad_beta = -np.dot(grad_logl.T, X).T + # + # elif distr == 'gamma': + # nu = 1. + # grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu + # if fit_intercept: + # grad_beta0 = -nu * np.sum(grad_logl) + # grad_beta = -nu * np.dot(grad_logl.T, X).T + # + # elif distr == 'neg-binomial': + # partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) + # + # if fit_intercept: + # grad_beta0 = np.sum(partial_beta_0) + # + # grad_beta = np.dot(partial_beta_0.T, X) grad_beta0 *= 1. / n_samples grad_beta *= 1. / n_samples + if fit_intercept: grad_beta += reg_lambda * (1 - alpha) * np.dot(InvCov, beta[1:]) g = np.zeros((n_features + 1, )) @@ -300,7 +320,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, return g - def _gradhess_logloss_1d(distr, xk, y, z, eta, theta, fit_intercept=True): """ Compute gradient (1st derivative) @@ -495,8 +514,8 @@ class GLM(BaseEstimator): Parameters ---------- - distr: str - distribution family can be one of the following + distr: str or object subclassed from BaseDistribution + if str, distribution family can be one of the following 'gaussian' | 'binomial' | 'poisson' | 'softplus' | 'probit' | 'gamma' | 'neg-binomial' default: 'poisson'. @@ -604,8 +623,7 @@ def __init__(self, distr='poisson', alpha=0.5, _check_params(distr=distr, max_iter=max_iter, fit_intercept=fit_intercept) - - self.distr = distr + self.distr = _get_distr(distr) self.alpha = alpha self.reg_lambda = reg_lambda self.Tau = Tau @@ -770,8 +788,11 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): xk = X[:, k] # Calculate grad and hess of log likelihood term - gk, hk = _gradhess_logloss_1d(self.distr, xk, y, z, self.eta, - self.theta, fit_intercept) + # gk, hk = _gradhess_logloss_1d(self.distr, xk, y, z, self.eta, + # self.theta, fit_intercept) + gk, hk = self.distr.gradhess_log_likelihood_1d(xk, y, z) + gk = 1. / n_samples * gk + hk = 1. / n_samples * hk # Add grad and hess of regularization term if self.Tau is None: @@ -998,8 +1019,11 @@ def predict(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - fit_intercept=True) + # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, + # fit_intercept=True) + + z = self.distr._z(self.beta0_, self.beta_, X) + yhat = self.distr.mu(z) if self.distr == 'binomial': yhat = (yhat > 0.5).astype(int) @@ -1028,8 +1052,10 @@ def _predict_proba(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - fit_intercept=True) + # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, + # fit_intercept=True) + z = self.distr._z(self.beta0_, self.beta_, X) + yhat = self.distr.mu(z) yhat = np.asarray(yhat) return yhat diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index ce8bc864..0f5c6b7d 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -7,6 +7,7 @@ import logging from scipy.special import log1p +from .distributions import BaseDistribution logger = logging.getLogger('pyglmnet') logger.addHandler(logging.StreamHandler()) @@ -113,9 +114,12 @@ def tikhonov_from_prior(prior_cov, n_samples, threshold=0.0001): def _check_params(distr, max_iter, fit_intercept): from .pyglmnet import ALLOWED_DISTRS - if distr not in ALLOWED_DISTRS: - raise ValueError('distr must be one of %s, Got ' - '%s' % (', '.join(ALLOWED_DISTRS), distr)) + err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' + 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) + if isinstance(distr, str) and distr not in ALLOWED_DISTRS: + raise ValueError(err_msg) + if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): + raise ValueError(err_msg) if not isinstance(max_iter, int): raise ValueError('max_iter must be of type int') From 5cf1a01e73fdc025725b289864d65cdf8caaf968 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Mon, 17 Aug 2020 23:56:25 -0700 Subject: [PATCH 02/24] add Poisson class --- pyglmnet/distributions.py | 58 ++++++++++++++++++++++++++++++++++++++- pyglmnet/metrics.py | 16 +++++++---- pyglmnet/pyglmnet.py | 10 +++++-- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 8a96c96e..9e9f50d4 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -42,7 +42,7 @@ def _z(self, beta0, beta, X): class Gaussian(BaseDistribution): - """Base class for distributions.""" + """Class for Gaussian distribution.""" def __init__(self): """init.""" @@ -78,3 +78,59 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): gk = np.sum((z - y) * xk) hk = np.sum(xk * xk) return gk, hk + + +class Poisson(BaseDistribution): + """Class for Poisson distribution.""" + + def __init__(self): + """init.""" + self.eta = None + + def mu(self, z): + """Inverse link function.""" + mu = z.copy() + mu0 = (1 - self.eta) * np.exp(self.eta) + mu[z > self.eta] = z[z > self.eta] * np.exp(self.eta) + mu0 + mu[z <= self.eta] = np.exp(z[z <= self.eta]) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = z.copy() + grad_mu[z > self.eta] = \ + np.ones_like(z)[z > self.eta] * np.exp(self.eta) + grad_mu[z <= self.eta] = np.exp(z[z <= self.eta]) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + eps = np.spacing(1) + log_likelihood = np.sum(y * np.log(y_hat + eps) - y_hat) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) + grad_beta = ((np.dot(grad_mu.T, X) - + np.dot((y * grad_mu / mu).T, X)).T) + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + s = expit(z) + gk = np.sum((mu[z <= self.eta] - y[z <= self.eta]) * + xk[z <= self.eta]) + \ + np.exp(self.eta) * \ + np.sum((1 - y[z > self.eta] / mu[z > self.eta]) * + xk[z > self.eta]) + hk = np.sum(mu[z <= self.eta] * xk[z <= self.eta] ** 2) + \ + np.exp(self.eta) ** 2 * \ + np.sum(y[z > self.eta] / (mu[z > self.eta] ** 2) * + (xk[z > self.eta] ** 2)) + return gk, hk diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index 60225b9f..332296d1 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -2,6 +2,7 @@ import numpy as np from .pyglmnet import _logL +from .distributions import Poisson def deviance(y, yhat, distr, theta): @@ -23,8 +24,10 @@ def deviance(y, yhat, distr, theta): score : float Deviance of the predicted labels. """ - if distr in ['softplus', 'poisson', 'neg-binomial']: - LS = _logL(distr, y, y, theta=theta) + # if distr in ['softplus', 'poisson', 'neg-binomial']: + # LS = _logL(distr, y, y, theta=theta) + if isinstance(distr, Poisson): + LS = distr.log_likelihood(y, y) else: LS = 0 @@ -56,8 +59,10 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): score : float Pseudo-R2 score. """ - if distr in ['softplus', 'poisson', 'neg-binomial']: - LS = _logL(distr, y, y, theta=theta) + # if distr in ['softplus', 'poisson', 'neg-binomial']: + # LS = _logL(distr, y, y, theta=theta) + if isinstance(distr, Poisson): + LS = distr.log_likelihood(y, y) else: LS = 0 @@ -66,7 +71,8 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): L0 = distr.log_likelihood(y, ynull_) L1 = distr.log_likelihood(y, yhat) - if distr in ['softplus', 'poisson', 'neg-binomial']: + # if distr in ['softplus', 'poisson', 'neg-binomial']: + if isinstance(distr, Poisson): score = (1 - (LS - L1) / (LS - L0)) else: score = (1 - L1 / L0) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 5a18950a..0d34c3ca 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -14,7 +14,7 @@ from .externals.sklearn.utils import check_random_state, check_array, check_X_y from .externals.sklearn.utils.validation import check_is_fitted -from .distributions import BaseDistribution, Gaussian +from .distributions import BaseDistribution, Gaussian, Poisson ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] @@ -24,9 +24,12 @@ def _get_distr(distr): if isinstance(distr, BaseDistribution): return distr - elif distr == "gaussian": + elif distr == 'gaussian': distr = Gaussian() + elif distr == 'poisson': + distr = Poisson() + return distr @@ -624,6 +627,9 @@ def __init__(self, distr='poisson', alpha=0.5, _check_params(distr=distr, max_iter=max_iter, fit_intercept=fit_intercept) self.distr = _get_distr(distr) + if isinstance(self.distr, Poisson): + self.distr.eta = eta + print(self.distr.eta) self.alpha = alpha self.reg_lambda = reg_lambda self.Tau = Tau From 5592298fbeb34206875b6f767242b61232164a26 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 00:15:53 -0700 Subject: [PATCH 03/24] add PoissonSoftplus class --- pyglmnet/distributions.py | 57 +++++++++++++++++++++++++++++++++++++++ pyglmnet/metrics.py | 8 +++--- pyglmnet/pyglmnet.py | 9 ++++--- pyglmnet/utils.py | 11 -------- 4 files changed, 67 insertions(+), 18 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 9e9f50d4..8ac5e974 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -2,6 +2,17 @@ from abc import ABC, abstractmethod import numpy as np +from scipy.special import expit, log1p + + +def softplus(z): + """Numerically stable version of log(1 + exp(z)).""" + # see stabilizing softplus: http://sachinashanbhag.blogspot.com/2014/05/numerically-approximation-of-log-1-expy.html # noqa + mu = z.copy() + mu[z > 35] = z[z > 35] + mu[z < -10] = np.exp(z[z < -10]) + mu[(z >= -10) & (z <= 35)] = log1p(np.exp(z[(z >= -10) & (z <= 35)])) + return mu class BaseDistribution(ABC): @@ -134,3 +145,49 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): np.sum(y[z > self.eta] / (mu[z > self.eta] ** 2) * (xk[z > self.eta] ** 2)) return gk, hk + + +class PoissonSoftplus(BaseDistribution): + """Class for Poisson distribution with softplus inverse link.""" + + def __init__(self): + """init.""" + pass + + def mu(self, z): + """Inverse link function.""" + mu = softplus(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + eps = np.spacing(1) + log_likelihood = np.sum(y * np.log(y_hat + eps) - y_hat) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) + grad_beta = ((np.dot(grad_mu.T, X) - + np.dot((y * grad_mu / mu).T, X)).T) + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + s = expit(z) + gk = np.sum(s * xk) - np.sum(y * s / mu * xk) + + grad_s = s * (1 - s) + grad_s_by_mu = grad_s / mu - s / (mu ** 2) + hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) + return gk, hk diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index 332296d1..c164a289 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -2,7 +2,7 @@ import numpy as np from .pyglmnet import _logL -from .distributions import Poisson +from .distributions import Poisson, PoissonSoftplus def deviance(y, yhat, distr, theta): @@ -26,7 +26,7 @@ def deviance(y, yhat, distr, theta): """ # if distr in ['softplus', 'poisson', 'neg-binomial']: # LS = _logL(distr, y, y, theta=theta) - if isinstance(distr, Poisson): + if isinstance(distr, (Poisson, PoissonSoftplus)): LS = distr.log_likelihood(y, y) else: LS = 0 @@ -61,7 +61,7 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): """ # if distr in ['softplus', 'poisson', 'neg-binomial']: # LS = _logL(distr, y, y, theta=theta) - if isinstance(distr, Poisson): + if isinstance(distr, (Poisson, PoissonSoftplus)): LS = distr.log_likelihood(y, y) else: LS = 0 @@ -72,7 +72,7 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): L1 = distr.log_likelihood(y, yhat) # if distr in ['softplus', 'poisson', 'neg-binomial']: - if isinstance(distr, Poisson): + if isinstance(distr, (Poisson, PoissonSoftplus)): score = (1 - (LS - L1) / (LS - L0)) else: score = (1 - L1 / L0) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 0d34c3ca..18b93bbf 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -8,13 +8,14 @@ from scipy.stats import norm from .utils import logger, set_log_level, _check_params, \ - _verbose_iterable, _tqdm_log, softplus + _verbose_iterable, _tqdm_log from .base import BaseEstimator, is_classifier, check_version from .externals.sklearn.utils import check_random_state, check_array, check_X_y from .externals.sklearn.utils.validation import check_is_fitted -from .distributions import BaseDistribution, Gaussian, Poisson +from .distributions import BaseDistribution, Gaussian, Poisson, \ + PoissonSoftplus, softplus ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] @@ -30,6 +31,9 @@ def _get_distr(distr): elif distr == 'poisson': distr = Poisson() + elif distr == 'softplus': + distr = PoissonSoftplus() + return distr @@ -629,7 +633,6 @@ def __init__(self, distr='poisson', alpha=0.5, self.distr = _get_distr(distr) if isinstance(self.distr, Poisson): self.distr.eta = eta - print(self.distr.eta) self.alpha = alpha self.reg_lambda = reg_lambda self.Tau = Tau diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index 0f5c6b7d..2940e151 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -5,7 +5,6 @@ import numpy as np from copy import copy import logging -from scipy.special import log1p from .distributions import BaseDistribution @@ -22,16 +21,6 @@ has_tqdm = False -def softplus(z): - """Numerically stable version of log(1 + exp(z)).""" - # see stabilizing softplus: http://sachinashanbhag.blogspot.com/2014/05/numerically-approximation-of-log-1-expy.html # noqa - mu = z.copy() - mu[z > 35] = z[z > 35] - mu[z < -10] = np.exp(z[z < -10]) - mu[(z >= -10) & (z <= 35)] = log1p(np.exp(z[(z >= -10) & (z <= 35)])) - return mu - - def softmax(w): """Softmax function of given array of number w. From f53e5f39e6b2e14df2e046053da7f4446d8f73ea Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 00:28:07 -0700 Subject: [PATCH 04/24] add NegBinomialSoftplus class --- pyglmnet/distributions.py | 56 ++++++++++++++++++++++++++++++++++++++- pyglmnet/metrics.py | 8 +++--- pyglmnet/pyglmnet.py | 7 ++++- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 8ac5e974..91da2fa5 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod import numpy as np -from scipy.special import expit, log1p +from scipy.special import expit, log1p, loggamma def softplus(z): @@ -191,3 +191,57 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): grad_s_by_mu = grad_s / mu - s / (mu ** 2) hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) return gk, hk + + +class NegBinomialSoftplus(BaseDistribution): + """Class for Negative binomial distribution with softplus inverse link.""" + + def __init__(self): + """init.""" + self.theta = None + + def mu(self, z): + """Inverse link function.""" + mu = softplus(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) + return grad_mu + + def log_likelihood(self, y, y_hat): + """Log L2-penalized likelihood.""" + theta = self.theta + log_likelihood = \ + np.sum(loggamma(y + theta) - loggamma(theta) - loggamma(y + 1) + + theta * np.log(theta) + y * np.log(y_hat) - (theta + y) * + np.log(y_hat + theta)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + theta = self.theta + partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) + grad_beta0 = np.sum(partial_beta_0) + grad_beta = np.dot(partial_beta_0.T, X) + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = expit(z) + hess_mu = np.exp(-z) * expit(z) ** 2 + + gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) + partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) + partial_beta_0_2 = grad_mu ** 2 * \ + ((y + theta) / (mu + theta) ** 2 - y / mu ** 2) + partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) + gk = np.dot(gradient_beta_j.T, xk) + hk = np.dot(partial_beta_0.T, xk ** 2) + return gk, hk diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index c164a289..28c018cc 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -2,7 +2,7 @@ import numpy as np from .pyglmnet import _logL -from .distributions import Poisson, PoissonSoftplus +from .distributions import Poisson, PoissonSoftplus, NegBinomialSoftplus def deviance(y, yhat, distr, theta): @@ -26,7 +26,7 @@ def deviance(y, yhat, distr, theta): """ # if distr in ['softplus', 'poisson', 'neg-binomial']: # LS = _logL(distr, y, y, theta=theta) - if isinstance(distr, (Poisson, PoissonSoftplus)): + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): LS = distr.log_likelihood(y, y) else: LS = 0 @@ -61,7 +61,7 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): """ # if distr in ['softplus', 'poisson', 'neg-binomial']: # LS = _logL(distr, y, y, theta=theta) - if isinstance(distr, (Poisson, PoissonSoftplus)): + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): LS = distr.log_likelihood(y, y) else: LS = 0 @@ -72,7 +72,7 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): L1 = distr.log_likelihood(y, yhat) # if distr in ['softplus', 'poisson', 'neg-binomial']: - if isinstance(distr, (Poisson, PoissonSoftplus)): + if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): score = (1 - (LS - L1) / (LS - L0)) else: score = (1 - L1 / L0) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 18b93bbf..f0e6ad33 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -15,7 +15,7 @@ from .externals.sklearn.utils.validation import check_is_fitted from .distributions import BaseDistribution, Gaussian, Poisson, \ - PoissonSoftplus, softplus + PoissonSoftplus, NegBinomialSoftplus, softplus ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] @@ -34,6 +34,9 @@ def _get_distr(distr): elif distr == 'softplus': distr = PoissonSoftplus() + elif distr == 'neg-binomial': + distr = NegBinomialSoftplus() + return distr @@ -633,6 +636,8 @@ def __init__(self, distr='poisson', alpha=0.5, self.distr = _get_distr(distr) if isinstance(self.distr, Poisson): self.distr.eta = eta + if isinstance(self.distr, NegBinomialSoftplus): + self.distr.theta = theta self.alpha = alpha self.reg_lambda = reg_lambda self.Tau = Tau From a93165dafa64717998adf45068114180c2de3dff Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 01:01:13 -0700 Subject: [PATCH 05/24] add Binomial and Probit classes --- pyglmnet/distributions.py | 146 ++++++++++++++++++++++++++++++++++++++ pyglmnet/pyglmnet.py | 21 ++++-- 2 files changed, 161 insertions(+), 6 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 91da2fa5..91bb641f 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod import numpy as np from scipy.special import expit, log1p, loggamma +from scipy.stats import norm def softplus(z): @@ -245,3 +246,148 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): gk = np.dot(gradient_beta_j.T, xk) hk = np.dot(partial_beta_0.T, xk ** 2) return gk, hk + + +class Binomial(BaseDistribution): + """Class for binomial distribution.""" + + def __init__(self): + """init.""" + pass + + def mu(self, z): + """Inverse link function.""" + mu = expit(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) * (1 - expit(z)) + return grad_mu + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + # prevents underflow + if z is not None: + log_likelihood = np.sum(y * z - np.log(1 + np.exp(z))) + # for scoring + else: + log_likelihood = \ + np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_beta0 = np.sum(mu - y) + grad_beta = np.dot((mu - y).T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + gk = np.sum((mu - y) * xk) + hk = np.sum(mu * (1.0 - mu) * xk * xk) + return gk, hk + + +class Probit(BaseDistribution): + """Class for probit distribution.""" + + def __init__(self): + """init.""" + pass + + def _probit_g1(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) + res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) + res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] + return res + + def _probit_g2(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] + res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) + return res + + def _probit_g3(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = -z[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] + res[z > thresh] = pdfz[z > thresh] + return res + + def _probit_g4(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = z[z > thresh] + return res + + def _probit_g5(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = 0 * z[z < -thresh] + res[np.abs(z) <= thresh] = \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / + cdfz[np.abs(z) <= thresh]) ** 2 + res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 + return res + + def _probit_g6(self, z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = \ + pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = 0 * z[z > thresh] + return res + + def mu(self, z): + """Inverse link function.""" + mu = norm.cdf(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = norm.pdf(z) + return grad_mu + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + if z is not None: + pdfz, cdfz = norm.pdf(z), norm.cdf(z) + log_likelihood = \ + np.sum(y * self._probit_g1(z, pdfz, cdfz) + + (1 - y) * self._probit_g2(z, pdfz, cdfz)) + else: + log_likelihood = \ + np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_beta0 = np.sum(mu - y) + grad_beta = np.dot((mu - y).T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + z = self._z(beta0, beta, X) + pdfz = norm.pdf(z) + cdfz = norm.cdf(z) + gk = -np.sum((y * self._probit_g3(z, pdfz, cdfz) - + (1 - y) * self._probit_g4(z, pdfz, cdfz)) * xk) + hk = np.sum((y * self._probit_g5(z, pdfz, cdfz) + + (1 - y) * self._probit_g6(z, pdfz, cdfz)) * (xk * xk)) + return gk, hk diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index f0e6ad33..6010d8be 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -15,7 +15,7 @@ from .externals.sklearn.utils.validation import check_is_fitted from .distributions import BaseDistribution, Gaussian, Poisson, \ - PoissonSoftplus, NegBinomialSoftplus, softplus + PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, softplus ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] @@ -37,6 +37,11 @@ def _get_distr(distr): elif distr == 'neg-binomial': distr = NegBinomialSoftplus() + elif distr == 'binomial': + distr = Binomial() + + elif distr == 'probit': + distr = Probit() return distr @@ -1039,7 +1044,7 @@ def predict(self, X): z = self.distr._z(self.beta0_, self.beta_, X) yhat = self.distr.mu(z) - if self.distr == 'binomial': + if isinstance(self.distr, Binomial): yhat = (yhat > 0.5).astype(int) yhat = np.asarray(yhat) return yhat @@ -1058,7 +1063,8 @@ def _predict_proba(self, X): The predicted targets of shape (n_samples,). """ - if self.distr not in ['binomial', 'probit']: + # if self.distr not in ['binomial', 'probit']: + if not isinstance(self.distr, (Binomial, Probit)): raise ValueError('This is only applicable for \ the binomial distribution.') @@ -1095,7 +1101,8 @@ def predict_proba(self, X): X = check_array(X, accept_sparse=False) check_is_fitted(self, 'is_fitted_') - if self.distr in ['binomial', 'probit']: + # if self.distr in ['binomial', 'probit']: + if isinstance(self.distr, (Binomial, Probit)): return self._predict_proba(X) else: warnings.warn('This is only applicable for \ @@ -1150,14 +1157,16 @@ def score(self, X, y): # For f1 as well if self.score_metric in ['accuracy']: - if self.distr not in ['binomial', 'multinomial']: + # if self.distr not in ['binomial', 'multinomial']: + if not isinstance(self.distr, (Binomial, Probit)): raise ValueError(self.score_metric + ' is only defined for binomial ' + 'or multinomial distributions') y = np.asarray(y).ravel() - if self.distr in ['binomial', 'probit'] and \ + # if self.distr in ['binomial', 'probit'] and \ + if isinstance(self.distr, (Binomial, Probit)) and \ self.score_metric != 'accuracy': yhat = self.predict_proba(X) else: From be79bdc024c7034fc3d76f0c33d11f9cd043268b Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 01:11:48 -0700 Subject: [PATCH 06/24] add GammaSoftplus class --- pyglmnet/distributions.py | 42 +++++++++++++++++++++++++++++++++++++++ pyglmnet/pyglmnet.py | 6 +++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 91bb641f..21fda8c8 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -391,3 +391,45 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): hk = np.sum((y * self._probit_g5(z, pdfz, cdfz) + (1 - y) * self._probit_g6(z, pdfz, cdfz)) * (xk * xk)) return gk, hk + + +class GammaSoftplus(BaseDistribution): + """Class for Gamma distribution with softplus inverse link.""" + + def __init__(self): + """init.""" + pass + + def mu(self, z): + """Inverse link function.""" + mu = softplus(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) + return grad_mu + + def log_likelihood(self, y, y_hat, z=None): + """Log L2-penalized likelihood.""" + # see + # https://www.statistics.ma.tum.de/fileadmin/w00bdb/www/czado/lec8.pdf + nu = 1. # shape parameter, exponential for now + log_likelihood = np.sum(nu * (-y / y_hat - np.log(y_hat))) + return log_likelihood + + def grad_log_likelihood(self, X, y, beta0, beta): + """Gradient of L2-penalized log likelihood.""" + z = self._z(beta0, beta, X) + mu = self.mu(z) + grad_mu = self.grad_mu(z) + nu = 1. + grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu + grad_beta0 = -nu * np.sum(grad_logl) + grad_beta = -nu * np.dot(grad_logl.T, X).T + return grad_beta0, grad_beta + + def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + """One-dimensional Gradient and Hessian of log likelihood.""" + raise NotImplementedError('cdfast is not implemented for Gamma ' + 'distribution') diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 6010d8be..6b005752 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -15,7 +15,8 @@ from .externals.sklearn.utils.validation import check_is_fitted from .distributions import BaseDistribution, Gaussian, Poisson, \ - PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, softplus + PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, GammaSoftplus, \ + softplus ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] @@ -42,6 +43,9 @@ def _get_distr(distr): elif distr == 'probit': distr = Probit() + + elif distr == 'gamma': + distr = GammaSoftplus() return distr From 9e43a027db7c4c36a4eaf2aba85b4777c7bad367 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Tue, 18 Aug 2020 14:59:05 -0400 Subject: [PATCH 07/24] TST add test for distributions --- pyglmnet/utils.py | 4 +-- tests/test_distributions.py | 71 +++++++++++++++++++++++++++++++++++++ tests/test_pyglmnet.py | 33 +---------------- 3 files changed, 74 insertions(+), 34 deletions(-) create mode 100644 tests/test_distributions.py diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index 2940e151..0622f441 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -106,9 +106,9 @@ def _check_params(distr, max_iter, fit_intercept): err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) if isinstance(distr, str) and distr not in ALLOWED_DISTRS: - raise ValueError(err_msg) + raise ValueError(err_msg) if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): - raise ValueError(err_msg) + raise TypeError(err_msg) if not isinstance(max_iter, int): raise ValueError('max_iter must be of type int') diff --git a/tests/test_distributions.py b/tests/test_distributions.py new file mode 100644 index 00000000..8ab48929 --- /dev/null +++ b/tests/test_distributions.py @@ -0,0 +1,71 @@ +"""Tests for distributions.""" + +from functools import partial + +import pytest + +import numpy as np +from numpy.testing import assert_allclose + +import scipy.sparse as sps +from scipy.optimize import approx_fprime +from sklearn.preprocessing import StandardScaler + +from pyglmnet import ALLOWED_DISTRS, simulate_glm, GLM, _grad_L2loss, _L2loss +from pyglmnet.distributions import BaseDistribution + + +def test_base_distribution(): + """Test the base distribution.""" + class TestDistr(BaseDistribution): + def mu(): + pass + + def grad_mu(): + pass + + class TestDistr2(TestDistr): + def log_likelihood(): + pass + + with pytest.raises(TypeError, match='abstract methods log_likelihood'): + distr = TestDistr() + + msg = 'Gradients of log likelihood are not specified' + with pytest.raises(NotImplementedError, match=msg): + distr = TestDistr2() + distr.grad_log_likelihood() + + msg = 'distr must be one of' + with pytest.raises(ValueError, match=msg): + GLM(distr='blah') + with pytest.raises(TypeError, match=msg): + GLM(distr=['gaussian']) + +@pytest.mark.parametrize("distr", ALLOWED_DISTRS) +def test_gradients(distr): + """Test gradient accuracy.""" + # data + scaler = StandardScaler() + n_samples, n_features = 1000, 100 + X = np.random.normal(0.0, 1.0, [n_samples, n_features]) + X = scaler.fit_transform(X) + + density = 0.1 + beta_ = np.zeros(n_features + 1) + beta_[0] = np.random.rand() + beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] + + reg_lambda = 0.1 + + glm = GLM(distr=distr, reg_lambda=reg_lambda) + y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) + + func = partial(_L2loss, distr, glm.alpha, + glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) + grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, + reg_lambda, X, y, + glm.eta, glm.theta) + approx_grad = approx_fprime(beta_, func, 1.5e-8) + analytical_grad = grad(beta_) + assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3) diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index b9401a63..7acc02f4 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -3,14 +3,12 @@ import subprocess import os.path as op -from functools import partial import pytest import numpy as np from numpy.testing import assert_allclose, assert_array_equal from pytest import raises import scipy.sparse as sps -from scipy.optimize import approx_fprime from sklearn.datasets import make_regression from sklearn.preprocessing import StandardScaler @@ -18,7 +16,7 @@ from sklearn.linear_model import ElasticNet from sklearn.utils.estimator_checks import check_estimator -from pyglmnet import (GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, +from pyglmnet import (GLM, GLMCV, simulate_glm, _gradhess_logloss_1d, _loss, datasets, ALLOWED_DISTRS) matplotlib.use('agg') @@ -29,35 +27,6 @@ def test_glm_estimator(): check_estimator(GLM) -@pytest.mark.parametrize("distr", ALLOWED_DISTRS) -def test_gradients(distr): - """Test gradient accuracy.""" - # data - scaler = StandardScaler() - n_samples, n_features = 1000, 100 - X = np.random.normal(0.0, 1.0, [n_samples, n_features]) - X = scaler.fit_transform(X) - - density = 0.1 - beta_ = np.zeros(n_features + 1) - beta_[0] = np.random.rand() - beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] - - reg_lambda = 0.1 - - glm = GLM(distr=distr, reg_lambda=reg_lambda) - y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) - - func = partial(_L2loss, distr, glm.alpha, - glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) - grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, - reg_lambda, X, y, - glm.eta, glm.theta) - approx_grad = approx_fprime(beta_, func, 1.5e-8) - analytical_grad = grad(beta_) - assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3) - - def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 From 381829119dbdbac0fa359424133666cb574969d4 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Tue, 18 Aug 2020 15:56:44 -0400 Subject: [PATCH 08/24] DOC: add example of cloglog --- examples/plot_custom_distributions.py | 76 +++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 examples/plot_custom_distributions.py diff --git a/examples/plot_custom_distributions.py b/examples/plot_custom_distributions.py new file mode 100644 index 00000000..edab74ff --- /dev/null +++ b/examples/plot_custom_distributions.py @@ -0,0 +1,76 @@ +""" +====================== +Rolling out custom GLM +====================== + +This is an example demonstrating rolling out your custom +GLM class using Pyglmnet. +""" +######################################################## + +# Author: Pavan Ramkumar +# License: MIT + +from sklearn.model_selection import train_test_split +from pyglmnet import GLMCV, datasets + +######################################################## +# Download and preprocess data files + +X, y = datasets.fetch_community_crime_data() +n_samples, n_features = X.shape + +######################################################## +# Split the data into training and test sets + +X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=0.33, random_state=0) + +######################################################## +# Now we define our own distribution class. This must +# inherit from BaseDistribution. The BaseDistribution +# class requires defining the following methods: +# - mu: inverse link function +# - grad_mu: gradient of the inverse link function +# - log_likelihood: the log likelihood function +# - grad_log_likelihood: the gradient of the log +# likelihood. +# All distributions in pyglmnet inherit from BaseDistribution +# +# This is really powerful. For instance, we can start from +# the existing Binomial distribution and override mu and grad_mu +# if we want to use the cloglog link function. + +import numpy as np +from pyglmnet.distributions import Binomial + + +class CustomBinomial(Binomial): + """Custom binomial distribution.""" + + def mu(self, z): + """clogclog inverse link""" + mu = 1 - np.exp(-np.exp(z)) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = np.exp(1 - np.exp(z)) + return grad_mu + + +distr = CustomBinomial() + +######################################################## +# Now we pass it to the GLMCV class just as before. + +# use the default value for reg_lambda +glm = GLMCV(distr=distr, alpha=0.05, score_metric='pseudo_R2', cv=3, + tol=1e-4) + +# fit model +glm.fit(X_train, y_train) + +# score the test set prediction +y_test_hat = glm.predict_proba(X_test) +print("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test)) From f6536d1cf1c247aab8ca045c152d8b3614c31b0f Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Tue, 18 Aug 2020 16:37:04 -0400 Subject: [PATCH 09/24] Simplify Poisson --- pyglmnet/distributions.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 21fda8c8..598c5301 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -148,7 +148,7 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): return gk, hk -class PoissonSoftplus(BaseDistribution): +class PoissonSoftplus(Poisson): """Class for Poisson distribution with softplus inverse link.""" def __init__(self): @@ -165,34 +165,6 @@ def grad_mu(self, z): grad_mu = expit(z) return grad_mu - def log_likelihood(self, y, y_hat): - """Log L2-penalized likelihood.""" - eps = np.spacing(1) - log_likelihood = np.sum(y * np.log(y_hat + eps) - y_hat) - return log_likelihood - - def grad_log_likelihood(self, X, y, beta0, beta): - """Gradient of L2-penalized log likelihood.""" - z = self._z(beta0, beta, X) - mu = self.mu(z) - grad_mu = self.grad_mu(z) - grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) - grad_beta = ((np.dot(grad_mu.T, X) - - np.dot((y * grad_mu / mu).T, X)).T) - return grad_beta0, grad_beta - - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): - """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) - mu = self.mu(z) - s = expit(z) - gk = np.sum(s * xk) - np.sum(y * s / mu * xk) - - grad_s = s * (1 - s) - grad_s_by_mu = grad_s / mu - s / (mu ** 2) - hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) - return gk, hk - class NegBinomialSoftplus(BaseDistribution): """Class for Negative binomial distribution with softplus inverse link.""" From 63ee2ca335dbfea32360bb32841af9963a81e5a9 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Tue, 18 Aug 2020 16:41:47 -0400 Subject: [PATCH 10/24] No need of init --- pyglmnet/distributions.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 598c5301..2ac636a5 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -56,10 +56,6 @@ def _z(self, beta0, beta, X): class Gaussian(BaseDistribution): """Class for Gaussian distribution.""" - def __init__(self): - """init.""" - pass - def mu(self, z): """Inverse link function.""" mu = z @@ -151,10 +147,6 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): class PoissonSoftplus(Poisson): """Class for Poisson distribution with softplus inverse link.""" - def __init__(self): - """init.""" - pass - def mu(self, z): """Inverse link function.""" mu = softplus(z) @@ -223,10 +215,6 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): class Binomial(BaseDistribution): """Class for binomial distribution.""" - def __init__(self): - """init.""" - pass - def mu(self, z): """Inverse link function.""" mu = expit(z) @@ -268,10 +256,6 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): class Probit(BaseDistribution): """Class for probit distribution.""" - def __init__(self): - """init.""" - pass - def _probit_g1(self, z, pdfz, cdfz, thresh=5): res = np.zeros_like(z) res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) @@ -368,10 +352,6 @@ def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): class GammaSoftplus(BaseDistribution): """Class for Gamma distribution with softplus inverse link.""" - def __init__(self): - """init.""" - pass - def mu(self, z): """Inverse link function.""" mu = softplus(z) From 4cfe839e055df3444cf2e4e17280cc159063f9c4 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 20:21:36 -0700 Subject: [PATCH 11/24] revert API of gradhess_log_likelihood_1d to accept z --- pyglmnet/distributions.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 2ac636a5..121bda81 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -80,9 +80,8 @@ def grad_log_likelihood(self, X, y, beta0, beta): grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) gk = np.sum((z - y) * xk) hk = np.sum(xk * xk) return gk, hk @@ -127,9 +126,8 @@ def grad_log_likelihood(self, X, y, beta0, beta): np.dot((y * grad_mu / mu).T, X)).T) return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) mu = self.mu(z) s = expit(z) gk = np.sum((mu[z <= self.eta] - y[z <= self.eta]) * @@ -195,13 +193,12 @@ def grad_log_likelihood(self, X, y, beta0, beta): grad_beta = np.dot(partial_beta_0.T, X) return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) mu = self.mu(z) grad_mu = expit(z) hess_mu = np.exp(-z) * expit(z) ** 2 - + theta = self.theta gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) partial_beta_0_2 = grad_mu ** 2 * \ @@ -244,9 +241,8 @@ def grad_log_likelihood(self, X, y, beta0, beta): grad_beta = np.dot((mu - y).T, X).T return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) mu = self.mu(z) gk = np.sum((mu - y) * xk) hk = np.sum(mu * (1.0 - mu) * xk * xk) @@ -337,9 +333,8 @@ def grad_log_likelihood(self, X, y, beta0, beta): grad_beta = np.dot((mu - y).T, X).T return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" - z = self._z(beta0, beta, X) pdfz = norm.pdf(z) cdfz = norm.cdf(z) gk = -np.sum((y * self._probit_g3(z, pdfz, cdfz) - @@ -381,7 +376,7 @@ def grad_log_likelihood(self, X, y, beta0, beta): grad_beta = -nu * np.dot(grad_logl.T, X).T return grad_beta0, grad_beta - def gradhess_log_likelihood_1d(self, xk, y, beta0, beta): + def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" raise NotImplementedError('cdfast is not implemented for Gamma ' 'distribution') From 6a8b7f403dd17951895dc96f41a4e46f1ac55f0a Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 20:21:59 -0700 Subject: [PATCH 12/24] make most tests pass --- pyglmnet/pyglmnet.py | 55 ++++++++++++++++++++++--------------- tests/test_distributions.py | 3 +- tests/test_pyglmnet.py | 10 +++---- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 6b005752..55425a65 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -465,9 +465,19 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, y: array simulated target data of shape (n_samples,) """ - if distr not in ALLOWED_DISTRS: - raise ValueError("'distr' must be in %s, got %s" - % (repr(ALLOWED_DISTRS), distr)) + err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' + 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) + if isinstance(distr, str) and distr not in ALLOWED_DISTRS: + raise ValueError(err_msg) + if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): + raise TypeError(err_msg) + + if isinstance(distr, str): + distr = _get_distr(distr) + if isinstance(distr, Poisson): + distr.eta = eta + if isinstance(distr, NegBinomialSoftplus): + distr.theta = theta if not isinstance(beta0, float): raise ValueError("'beta0' must be float, got %s" % type(beta0)) @@ -475,28 +485,28 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, if beta.ndim != 1: raise ValueError("'beta' must be 1D, got %dD" % beta.ndim) + if not fit_intercept: + beta0 = 0. + + mu = distr.mu(distr._z(beta0, beta, X)) + if not sample: - return _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) + return mu _random_state = check_random_state(random_state) - if distr == 'softplus' or distr == 'poisson': - y = _random_state.poisson( - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'gaussian': - y = _random_state.normal( - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'binomial' or distr == 'probit': - y = _random_state.binomial( - 1, - _lmb(distr, beta0, beta, X, eta, - fit_intercept=fit_intercept)) - if distr == 'gamma': - mu = _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) + if isinstance(distr, (PoissonSoftplus, Poisson)): + y = _random_state.poisson(mu) + + if isinstance(distr, Gaussian): + y = _random_state.normal(mu) + + if isinstance(distr, (Binomial, Probit)): + y = _random_state.binomial(1, mu) + + if isinstance(distr, GammaSoftplus): y = np.exp(mu) - if distr == 'neg-binomial': - mu = _lmb(distr, beta0, beta, X, eta, fit_intercept=fit_intercept) + + if isinstance(distr, NegBinomialSoftplus): p = theta / (theta + mu) # Probability of success y = _random_state.negative_binomial(theta, p) return y @@ -838,7 +848,8 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): update = 1. / hk * gk if hk > 1. else self.learning_rate * gk # Update parameters, z - beta[k], z = beta[k] - update, z - update * xk + # beta[k], z = beta[k] - update, z - update * xk + beta[k] = beta[k] - update return beta def fit(self, X, y): diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 8ab48929..e14dc0ce 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -42,6 +42,7 @@ def log_likelihood(): with pytest.raises(TypeError, match=msg): GLM(distr=['gaussian']) + @pytest.mark.parametrize("distr", ALLOWED_DISTRS) def test_gradients(distr): """Test gradient accuracy.""" @@ -63,7 +64,7 @@ def test_gradients(distr): func = partial(_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) - grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, + grad = partial(_grad_L2loss, glm.distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.theta) approx_grad = approx_fprime(beta_, func, 1.5e-8) diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index 7acc02f4..f7ad73b0 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -17,7 +17,7 @@ from sklearn.utils.estimator_checks import check_estimator from pyglmnet import (GLM, GLMCV, simulate_glm, - _gradhess_logloss_1d, _loss, datasets, ALLOWED_DISTRS) + _loss, datasets, ALLOWED_DISTRS) matplotlib.use('agg') @@ -343,7 +343,9 @@ def test_cdfast(distr): z = beta_[0] + np.dot(X, beta_[1:]) k = 1 xk = X[:, k - 1] - gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta, glm.theta) + gk, hk = glm.distr.gradhess_log_likelihood_1d(xk, y, z) + gk = 1. / n_samples * gk + hk = 1. / n_samples * hk # test grad and hess if distr != 'multinomial': @@ -407,7 +409,6 @@ def test_random_state_consistency(): @pytest.mark.parametrize("distr", ALLOWED_DISTRS) def test_simulate_glm(distr): """Test that every generative model can be simulated from.""" - random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 10, 3 @@ -427,13 +428,12 @@ def test_simulate_glm(distr): # If the distribution name is garbage it will fail distr = 'multivariate_gaussian_poisson' - with pytest.raises(ValueError, match="'distr' must be in"): + with pytest.raises(ValueError): simulate_glm(distr, 1.0, 1.0, np.array([[1.0]])) def test_api_input(): """Test that the input value of y can be of different types.""" - random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 From 051c1d93a871e775d4a8b1a374369ef50cb54922 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 21:54:05 -0700 Subject: [PATCH 13/24] re-add gradhess_log_likelihood_1d for softplus --- pyglmnet/distributions.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 121bda81..0e200a56 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -155,6 +155,17 @@ def grad_mu(self, z): grad_mu = expit(z) return grad_mu + def gradhess_log_likelihood_1d(self, xk, y, z): + """One-dimensional Gradient and Hessian of log likelihood.""" + mu = self.mu(z) + s = expit(z) + gk = np.sum(s * xk) - np.sum(y * s / mu * xk) + + grad_s = s * (1 - s) + grad_s_by_mu = grad_s / mu - s / (mu ** 2) + hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) + return gk, hk + class NegBinomialSoftplus(BaseDistribution): """Class for Negative binomial distribution with softplus inverse link.""" From e0c2f3107fe000f274d683bb66a016f5b902fc3e Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 21:54:31 -0700 Subject: [PATCH 14/24] bug in cdfast update for z --- pyglmnet/pyglmnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 55425a65..c5edf0d6 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -848,8 +848,8 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): update = 1. / hk * gk if hk > 1. else self.learning_rate * gk # Update parameters, z - # beta[k], z = beta[k] - update, z - update * xk - beta[k] = beta[k] - update + beta[k], z = beta[k] - update, z - update * xk + return beta def fit(self, X, y): From e6e6d227982c16acfa53f58b5d444aa5ae2a57c4 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 21:54:57 -0700 Subject: [PATCH 15/24] style fix for test_compare_sklearn --- tests/test_pyglmnet.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index f7ad73b0..08c8afd4 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -285,7 +285,7 @@ def test_compare_sklearn(solver): def rmse(a, b): return np.sqrt(np.mean((a - b) ** 2)) - X, Y, coef_ = make_regression( + X, y, coef_ = make_regression( n_samples=1000, n_features=500, noise=0.1, n_informative=10, coef=True, random_state=42) @@ -294,18 +294,18 @@ def rmse(a, b): l1_ratio = 0.5 clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5) - clf.fit(X, Y) + clf.fit(X, y) glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=500) - glm.fit(X, Y) + glm.fit(X, y) y_sk = clf.predict(X) y_pg = glm.predict(X) - assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5 + assert abs(rmse(y, y_sk) - rmse(y, y_pg)) < 0.5 glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=5, fit_intercept=False) - glm.fit(X, Y) + glm.fit(X, y) assert glm.beta0_ == 0. glm.predict(X) From 888e5b87a09a6d5a6c85c5ad079762d197db2efb Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 22:40:47 -0700 Subject: [PATCH 16/24] private method _set_distr derives self.distr_ object from self.distr str --- pyglmnet/pyglmnet.py | 95 +++++++++++++++---------------------- pyglmnet/utils.py | 4 +- tests/test_distributions.py | 5 +- tests/test_pyglmnet.py | 3 +- 4 files changed, 44 insertions(+), 63 deletions(-) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index c5edf0d6..50d9dee4 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -22,33 +22,6 @@ 'probit', 'gamma', 'neg-binomial'] -def _get_distr(distr): - if isinstance(distr, BaseDistribution): - return distr - - elif distr == 'gaussian': - distr = Gaussian() - - elif distr == 'poisson': - distr = Poisson() - - elif distr == 'softplus': - distr = PoissonSoftplus() - - elif distr == 'neg-binomial': - distr = NegBinomialSoftplus() - - elif distr == 'binomial': - distr = Binomial() - - elif distr == 'probit': - distr = Probit() - - elif distr == 'gamma': - distr = GammaSoftplus() - return distr - - def _probit_g1(z, pdfz, cdfz, thresh=5): res = np.zeros_like(z) res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) @@ -472,12 +445,8 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): raise TypeError(err_msg) - if isinstance(distr, str): - distr = _get_distr(distr) - if isinstance(distr, Poisson): - distr.eta = eta - if isinstance(distr, NegBinomialSoftplus): - distr.theta = theta + glm = GLM(distr=distr) + glm._set_distr() if not isinstance(beta0, float): raise ValueError("'beta0' must be float, got %s" % type(beta0)) @@ -488,7 +457,7 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, if not fit_intercept: beta0 = 0. - mu = distr.mu(distr._z(beta0, beta, X)) + mu = glm.distr_.mu(glm.distr_._z(beta0, beta, X)) if not sample: return mu @@ -652,11 +621,7 @@ def __init__(self, distr='poisson', alpha=0.5, _check_params(distr=distr, max_iter=max_iter, fit_intercept=fit_intercept) - self.distr = _get_distr(distr) - if isinstance(self.distr, Poisson): - self.distr.eta = eta - if isinstance(self.distr, NegBinomialSoftplus): - self.distr.theta = theta + self.distr = distr self.alpha = alpha self.reg_lambda = reg_lambda self.Tau = Tau @@ -674,9 +639,27 @@ def __init__(self, distr='poisson', alpha=0.5, self.theta = theta set_log_level(verbose) + def _set_distr(self): + distr_lookup = { + 'gaussian': Gaussian(), + 'poisson': Poisson(), + 'softplus': PoissonSoftplus(), + 'neg-binomial': NegBinomialSoftplus(), + 'binomial': Binomial(), + 'probit': Probit(), + 'gamma': GammaSoftplus() + } + self.distr_ = distr_lookup[self.distr] + if isinstance(self.distr_, Poisson): + self.distr_.eta = self.eta + if isinstance(self.distr_, NegBinomialSoftplus): + self.distr_.theta = self.theta + def _set_cv(cv, estimator=None, X=None, y=None): - """Set the default CV depending on whether clf - is classifier/regressor.""" + """Set the default CV. + + Depends on whether clf is classifier/regressor. + """ # Detect whether classification or regression if estimator in ['classifier', 'regressor']: est_is_classifier = estimator == 'classifier' @@ -823,7 +806,7 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): # Calculate grad and hess of log likelihood term # gk, hk = _gradhess_logloss_1d(self.distr, xk, y, z, self.eta, # self.theta, fit_intercept) - gk, hk = self.distr.gradhess_log_likelihood_1d(xk, y, z) + gk, hk = self.distr_.gradhess_log_likelihood_1d(xk, y, z) gk = 1. / n_samples * gk hk = 1. / n_samples * hk @@ -849,7 +832,7 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): # Update parameters, z beta[k], z = beta[k] - update, z - update * xk - + return beta def fit(self, X, y): @@ -869,7 +852,7 @@ def fit(self, X, y): The fitted model. """ X, y = check_X_y(X, y, accept_sparse=False) - + self._set_distr() self.beta0_ = None self.beta_ = None self.ynull_ = None @@ -942,7 +925,7 @@ def fit(self, X, y): self.n_iter_ += 1 beta_old = beta.copy() if self.solver == 'batch-gradient': - grad = _grad_L2loss(self.distr, + grad = _grad_L2loss(self.distr_, alpha, self.Tau, reg_lambda, X, y, self.eta, self.theta, beta, self.fit_intercept) @@ -1056,10 +1039,10 @@ def predict(self, X): # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, # fit_intercept=True) - z = self.distr._z(self.beta0_, self.beta_, X) - yhat = self.distr.mu(z) + z = self.distr_._z(self.beta0_, self.beta_, X) + yhat = self.distr_.mu(z) - if isinstance(self.distr, Binomial): + if isinstance(self.distr_, Binomial): yhat = (yhat > 0.5).astype(int) yhat = np.asarray(yhat) return yhat @@ -1079,7 +1062,7 @@ def _predict_proba(self, X): """ # if self.distr not in ['binomial', 'probit']: - if not isinstance(self.distr, (Binomial, Probit)): + if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError('This is only applicable for \ the binomial distribution.') @@ -1089,8 +1072,8 @@ def _predict_proba(self, X): # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, # fit_intercept=True) - z = self.distr._z(self.beta0_, self.beta_, X) - yhat = self.distr.mu(z) + z = self.distr_._z(self.beta0_, self.beta_, X) + yhat = self.distr_.mu(z) yhat = np.asarray(yhat) return yhat @@ -1117,7 +1100,7 @@ def predict_proba(self, X): check_is_fitted(self, 'is_fitted_') # if self.distr in ['binomial', 'probit']: - if isinstance(self.distr, (Binomial, Probit)): + if isinstance(self.distr_, (Binomial, Probit)): return self._predict_proba(X) else: warnings.warn('This is only applicable for \ @@ -1173,7 +1156,7 @@ def score(self, X, y): # For f1 as well if self.score_metric in ['accuracy']: # if self.distr not in ['binomial', 'multinomial']: - if not isinstance(self.distr, (Binomial, Probit)): + if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError(self.score_metric + ' is only defined for binomial ' + 'or multinomial distributions') @@ -1181,7 +1164,7 @@ def score(self, X, y): y = np.asarray(y).ravel() # if self.distr in ['binomial', 'probit'] and \ - if isinstance(self.distr, (Binomial, Probit)) and \ + if isinstance(self.distr_, (Binomial, Probit)) and \ self.score_metric != 'accuracy': yhat = self.predict_proba(X) else: @@ -1189,10 +1172,10 @@ def score(self, X, y): # Check whether we have a list of estimators or a single estimator if self.score_metric == 'deviance': - return metrics.deviance(y, yhat, self.distr, self.theta) + return metrics.deviance(y, yhat, self.distr_, self.theta) elif self.score_metric == 'pseudo_R2': return metrics.pseudo_R2(X, y, yhat, self.ynull_, - self.distr, self.theta) + self.distr_, self.theta) if self.score_metric == 'accuracy': return metrics.accuracy(y, yhat) diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index 0622f441..82df0482 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -105,10 +105,8 @@ def _check_params(distr, max_iter, fit_intercept): err_msg = ('distr must be one of %s or a subclass of BaseDistribution. ' 'Got %s' % (', '.join(ALLOWED_DISTRS), distr)) - if isinstance(distr, str) and distr not in ALLOWED_DISTRS: + if distr not in ALLOWED_DISTRS: raise ValueError(err_msg) - if not isinstance(distr, str) and not isinstance(distr, BaseDistribution): - raise TypeError(err_msg) if not isinstance(max_iter, int): raise ValueError('max_iter must be of type int') diff --git a/tests/test_distributions.py b/tests/test_distributions.py index e14dc0ce..d9c0c1c6 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -39,8 +39,6 @@ def log_likelihood(): msg = 'distr must be one of' with pytest.raises(ValueError, match=msg): GLM(distr='blah') - with pytest.raises(TypeError, match=msg): - GLM(distr=['gaussian']) @pytest.mark.parametrize("distr", ALLOWED_DISTRS) @@ -60,11 +58,12 @@ def test_gradients(distr): reg_lambda = 0.1 glm = GLM(distr=distr, reg_lambda=reg_lambda) + glm._set_distr() y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) func = partial(_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) - grad = partial(_grad_L2loss, glm.distr, glm.alpha, glm.Tau, + grad = partial(_grad_L2loss, glm.distr_, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.theta) approx_grad = approx_fprime(beta_, func, 1.5e-8) diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index 08c8afd4..490a495b 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -325,6 +325,7 @@ def test_cdfast(distr): return glm = GLM(distr, solver='cdfast') + glm._set_distr() np.random.seed(glm.random_state) @@ -343,7 +344,7 @@ def test_cdfast(distr): z = beta_[0] + np.dot(X, beta_[1:]) k = 1 xk = X[:, k - 1] - gk, hk = glm.distr.gradhess_log_likelihood_1d(xk, y, z) + gk, hk = glm.distr_.gradhess_log_likelihood_1d(xk, y, z) gk = 1. / n_samples * gk hk = 1. / n_samples * hk From d33c6cadcce75cf58f80500e37b58717878f0ac5 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 23:22:47 -0700 Subject: [PATCH 17/24] remove legacy functions --- pyglmnet/__init__.py | 2 +- pyglmnet/distributions.py | 2 +- pyglmnet/metrics.py | 10 +- pyglmnet/pyglmnet.py | 321 +++--------------------------------- pyglmnet/utils.py | 5 +- tests/test_distributions.py | 2 +- tests/test_pyglmnet.py | 7 +- 7 files changed, 33 insertions(+), 316 deletions(-) diff --git a/pyglmnet/__init__.py b/pyglmnet/__init__.py index 89ae6e23..028a9030 100644 --- a/pyglmnet/__init__.py +++ b/pyglmnet/__init__.py @@ -19,7 +19,7 @@ __version__ = '1.2.dev0' -from .pyglmnet import GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, _gradhess_logloss_1d, _loss, ALLOWED_DISTRS +from .pyglmnet import GLM, GLMCV, _grad_L2loss, _L2loss, simulate_glm, _loss, ALLOWED_DISTRS from .utils import softmax, label_binarizer, set_log_level from .datasets import fetch_tikhonov_data, fetch_rgc_spike_trains, fetch_community_crime_data, fetch_group_lasso_data from . import externals diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 0e200a56..ca2f7021 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -34,7 +34,7 @@ def grad_mu(self, z): pass @abstractmethod - def log_likelihood(self, y, y_hat): + def log_likelihood(self, y, y_hat, z=None): """Log L2-penalized likelihood.""" pass diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py index 28c018cc..d97754bc 100644 --- a/pyglmnet/metrics.py +++ b/pyglmnet/metrics.py @@ -1,7 +1,6 @@ -""" A set of scoring functions. """ +"""A set of scoring functions.""" import numpy as np -from .pyglmnet import _logL from .distributions import Poisson, PoissonSoftplus, NegBinomialSoftplus @@ -24,8 +23,6 @@ def deviance(y, yhat, distr, theta): score : float Deviance of the predicted labels. """ - # if distr in ['softplus', 'poisson', 'neg-binomial']: - # LS = _logL(distr, y, y, theta=theta) if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): LS = distr.log_likelihood(y, y) else: @@ -59,19 +56,14 @@ def pseudo_R2(X, y, yhat, ynull_, distr, theta): score : float Pseudo-R2 score. """ - # if distr in ['softplus', 'poisson', 'neg-binomial']: - # LS = _logL(distr, y, y, theta=theta) if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): LS = distr.log_likelihood(y, y) else: LS = 0 - # L0 = _logL(distr, y, ynull_, theta=theta) - # L1 = _logL(distr, y, yhat, theta=theta) L0 = distr.log_likelihood(y, ynull_) L1 = distr.log_likelihood(y, yhat) - # if distr in ['softplus', 'poisson', 'neg-binomial']: if isinstance(distr, (Poisson, PoissonSoftplus, NegBinomialSoftplus)): score = (1 - (LS - L1) / (LS - L0)) else: diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 50d9dee4..8471181a 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -4,8 +4,6 @@ from copy import deepcopy import numpy as np -from scipy.special import expit, loggamma -from scipy.stats import norm from .utils import logger, set_log_level, _check_params, \ _verbose_iterable, _tqdm_log @@ -15,154 +13,12 @@ from .externals.sklearn.utils.validation import check_is_fitted from .distributions import BaseDistribution, Gaussian, Poisson, \ - PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, GammaSoftplus, \ - softplus + PoissonSoftplus, NegBinomialSoftplus, Binomial, Probit, GammaSoftplus ALLOWED_DISTRS = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma', 'neg-binomial'] -def _probit_g1(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) - res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) - res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] - return res - - -def _probit_g2(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] - res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) - return res - - -def _probit_g3(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = -z[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] - res[z > thresh] = pdfz[z > thresh] - return res - - -def _probit_g4(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = z[z > thresh] - return res - - -def _probit_g5(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = 0 * z[z < -thresh] - res[np.abs(z) <= thresh] = \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / - cdfz[np.abs(z) <= thresh]) ** 2 - res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 - return res - - -def _probit_g6(z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = \ - pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = 0 * z[z > thresh] - return res - - -def _z(beta0, beta, X, fit_intercept): - """Compute z to be passed through non-linearity.""" - if fit_intercept: - z = beta0 + np.dot(X, beta) - else: - z = np.dot(X, np.r_[beta0, beta]) - return z - - -def _lmb(distr, beta0, beta, X, eta, fit_intercept=True): - """Conditional intensity function.""" - z = _z(beta0, beta, X, fit_intercept) - return _mu(distr, z, eta, fit_intercept) - - -def _mu(distr, z, eta, fit_intercept): - """The non-linearity (inverse link).""" - if distr in ['softplus', 'gamma', 'neg-binomial']: - mu = softplus(z) - elif distr == 'poisson': - mu = z.copy() - beta0 = (1 - eta) * np.exp(eta) if fit_intercept else 0. - mu[z > eta] = z[z > eta] * np.exp(eta) + beta0 - mu[z <= eta] = np.exp(z[z <= eta]) - elif distr == 'gaussian': - mu = z - elif distr == 'binomial': - mu = expit(z) - elif distr == 'probit': - mu = norm.cdf(z) - return mu - - -def _grad_mu(distr, z, eta): - """Derivative of the non-linearity.""" - if distr in ['softplus', 'gamma', 'neg-binomial']: - grad_mu = expit(z) - elif distr == 'poisson': - grad_mu = z.copy() - grad_mu[z > eta] = np.ones_like(z)[z > eta] * np.exp(eta) - grad_mu[z <= eta] = np.exp(z[z <= eta]) - elif distr == 'gaussian': - grad_mu = np.ones_like(z) - elif distr == 'binomial': - grad_mu = expit(z) * (1 - expit(z)) - elif distr in 'probit': - grad_mu = norm.pdf(z) - return grad_mu - - -def _logL(distr, y, y_hat, z=None, theta=1.0): - """The log likelihood.""" - if distr in ['softplus', 'poisson']: - eps = np.spacing(1) - logL = np.sum(y * np.log(y_hat + eps) - y_hat) - elif distr == 'gaussian': - logL = -0.5 * np.sum((y - y_hat)**2) - elif distr == 'binomial': - - # prevents underflow - if z is not None: - logL = np.sum(y * z - np.log(1 + np.exp(z))) - # for scoring - else: - logL = np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) - elif distr == 'probit': - if z is not None: - pdfz, cdfz = norm.pdf(z), norm.cdf(z) - logL = np.sum(y * _probit_g1(z, pdfz, cdfz) + - (1 - y) * _probit_g2(z, pdfz, cdfz)) - else: - logL = np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) - elif distr == 'gamma': - # see - # https://www.statistics.ma.tum.de/fileadmin/w00bdb/www/czado/lec8.pdf - nu = 1. # shape parameter, exponential for now - logL = np.sum(nu * (-y / y_hat - np.log(y_hat))) - elif distr == 'neg-binomial': - logL = np.sum(loggamma(y + theta) - loggamma(theta) - loggamma(y + 1) + - theta * np.log(theta) + y * np.log(y_hat) - (theta + y) * - np.log(y_hat + theta)) - return logL - - def _penalty(alpha, beta, Tau, group): """The penalty.""" # Combine L1 and L2 penalty terms @@ -209,9 +65,15 @@ def _loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, group, beta, fit_intercept=True): """Define the objective function for elastic net.""" n_samples, n_features = X.shape - z = _z(beta[0], beta[1:], X, fit_intercept) - y_hat = _mu(distr, z, eta, fit_intercept) - L = 1. / n_samples * _logL(distr, y, y_hat, z, theta) + if fit_intercept: + z = distr._z(beta[0], beta[1:], X) + else: + z = distr._z(0., beta, X) + y_hat = distr.mu(z) + if isinstance(distr, (Binomial, Probit)): + L = 1. / n_samples * distr.log_likelihood(y, y_hat, z) + else: + L = 1. / n_samples * distr.log_likelihood(y, y_hat) if fit_intercept: P = _penalty(alpha, beta[1:], Tau, group) else: @@ -224,9 +86,15 @@ def _L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, group, beta, fit_intercept=True): """Define the objective function for elastic net.""" n_samples, n_features = X.shape - z = _z(beta[0], beta[1:], X, fit_intercept) - y_hat = _mu(distr, z, eta, fit_intercept) - L = 1. / n_samples * _logL(distr, y, y_hat, z, theta) + if fit_intercept: + z = distr._z(beta[0], beta[1:], X) + else: + z = distr._z(0., beta, X) + y_hat = distr.mu(z) + if isinstance(distr, (Binomial, Probit)): + L = 1. / n_samples * distr.log_likelihood(y, y_hat, z) + else: + L = 1. / n_samples * distr.log_likelihood(y, y_hat) if fit_intercept: P = 0.5 * (1 - alpha) * _L2penalty(beta[1:], Tau) else: @@ -248,10 +116,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, Tau = np.eye(beta.shape[0]) InvCov = np.dot(Tau.T, Tau) - # z = _z(beta[0], beta[1:], X, fit_intercept) - # mu = _mu(distr, z, eta, fit_intercept) - # grad_mu = _grad_mu(distr, z, eta) - if fit_intercept: beta0_, beta_ = beta[0], beta[1:] else: @@ -259,45 +123,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, grad_beta0, grad_beta = distr.grad_log_likelihood(X, y, beta0_, beta_) grad_beta0 = grad_beta0 if fit_intercept else 0. - # grad_beta0 = 0. - # if distr in ['poisson', 'softplus']: - # if fit_intercept: - # grad_beta0 = np.sum(grad_mu) - np.sum(y * grad_mu / mu) - # grad_beta = ((np.dot(grad_mu.T, X) - - # np.dot((y * grad_mu / mu).T, X)).T) - # - # elif distr == 'gaussian': - # if fit_intercept: - # grad_beta0 = np.sum((mu - y) * grad_mu) - # grad_beta = np.dot((mu - y).T, X * grad_mu[:, None]).T - # - # elif distr == 'binomial': - # if fit_intercept: - # grad_beta0 = np.sum(mu - y) - # grad_beta = np.dot((mu - y).T, X).T - # - # elif distr == 'probit': - # grad_logl = (y * _probit_g3(z, grad_mu, mu) - - # (1 - y) * _probit_g4(z, grad_mu, mu)) - # if fit_intercept: - # grad_beta0 = -np.sum(grad_logl) - # grad_beta = -np.dot(grad_logl.T, X).T - # - # elif distr == 'gamma': - # nu = 1. - # grad_logl = (y / mu ** 2 - 1 / mu) * grad_mu - # if fit_intercept: - # grad_beta0 = -nu * np.sum(grad_logl) - # grad_beta = -nu * np.dot(grad_logl.T, X).T - # - # elif distr == 'neg-binomial': - # partial_beta_0 = grad_mu * ((theta + y) / (mu + theta) - y / mu) - # - # if fit_intercept: - # grad_beta0 = np.sum(partial_beta_0) - # - # grad_beta = np.dot(partial_beta_0.T, X) - grad_beta0 *= 1. / n_samples grad_beta *= 1. / n_samples @@ -312,104 +137,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, theta, beta, return g -def _gradhess_logloss_1d(distr, xk, y, z, eta, theta, fit_intercept=True): - """ - Compute gradient (1st derivative) - and Hessian (2nd derivative) - of log likelihood for a single coordinate. - - Parameters - ---------- - xk: float - (n_samples,) - y: float - (n_samples,) - z: float - (n_samples,) - - Returns - ------- - gk: gradient, float: - (n_features + 1,) - hk: float: - (n_features + 1,) - """ - n_samples = xk.shape[0] - - if distr == 'softplus': - mu = _mu(distr, z, eta, fit_intercept) - s = expit(z) - gk = np.sum(s * xk) - np.sum(y * s / mu * xk) - - grad_s = s * (1 - s) - grad_s_by_mu = grad_s / mu - s / (mu ** 2) - hk = np.sum(grad_s * xk ** 2) - np.sum(y * grad_s_by_mu * xk ** 2) - - elif distr == 'poisson': - mu = _mu(distr, z, eta, fit_intercept) - s = expit(z) - gk = np.sum((mu[z <= eta] - y[z <= eta]) * - xk[z <= eta]) + \ - np.exp(eta) * \ - np.sum((1 - y[z > eta] / mu[z > eta]) * - xk[z > eta]) - hk = np.sum(mu[z <= eta] * xk[z <= eta] ** 2) + \ - np.exp(eta) ** 2 * \ - np.sum(y[z > eta] / (mu[z > eta] ** 2) * - (xk[z > eta] ** 2)) - - elif distr == 'gaussian': - gk = np.sum((z - y) * xk) - hk = np.sum(xk * xk) - - elif distr == 'binomial': - mu = _mu(distr, z, eta, fit_intercept) - gk = np.sum((mu - y) * xk) - hk = np.sum(mu * (1.0 - mu) * xk * xk) - - elif distr == 'probit': - pdfz = norm.pdf(z) - cdfz = norm.cdf(z) - gk = -np.sum((y * _probit_g3(z, pdfz, cdfz) - - (1 - y) * _probit_g4(z, pdfz, cdfz)) * xk) - hk = np.sum((y * _probit_g5(z, pdfz, cdfz) + - (1 - y) * _probit_g6(z, pdfz, cdfz)) * (xk * xk)) - - elif distr == 'neg-binomial': - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - hess_mu = np.exp(-z) * expit(z)**2 - - gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_2 = grad_mu**2 * \ - ((y + theta) / (mu + theta)**2 - y / mu**2) - partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) - gk = np.dot(gradient_beta_j.T, xk) - hk = np.dot(partial_beta_0.T, xk**2) - - elif distr == 'gamma': - raise NotImplementedError('cdfast is not implemented for Gamma ' - 'distribution') - elif distr == 'neg-binomial': - mu = _mu(distr, z, eta, fit_intercept) - grad_mu = _grad_mu(distr, z, eta) - hess_mu = np.exp(-z) * expit(z) ** 2 - - gradient_beta_j = -grad_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_1 = hess_mu * (y / mu - (y + theta) / (mu + theta)) - partial_beta_0_2 = grad_mu ** 2 * \ - ((y + theta) / (mu + theta) ** 2 - y / mu ** 2) - partial_beta_0 = -(partial_beta_0_1 + partial_beta_0_2) - gk = np.dot(gradient_beta_j.T, xk) - hk = np.dot(partial_beta_0.T, xk ** 2) - - elif distr == 'gamma': - raise NotImplementedError('cdfast is not implemented for Gamma ' - 'distribution') - - return 1. / n_samples * gk, 1. / n_samples * hk - def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, sample=False, theta=1.0, fit_intercept=True): @@ -791,7 +518,10 @@ def _cdfast(self, X, y, ActiveSet, beta, rl, fit_intercept=True): """ n_samples, n_features = X.shape reg_scale = rl * (1 - self.alpha) - z = _z(beta[0], beta[1:], X, fit_intercept) + if fit_intercept: + z = self.distr_._z(beta[0], beta[1:], X) + else: + z = self.distr_._z(0., beta, X) for k in range(0, n_features + int(fit_intercept)): # Only update parameters in active set if ActiveSet[k] != 0: @@ -1036,9 +766,6 @@ def predict(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - # fit_intercept=True) - z = self.distr_._z(self.beta0_, self.beta_, X) yhat = self.distr_.mu(z) @@ -1070,8 +797,6 @@ def _predict_proba(self, X): raise ValueError('Input data should be of type ndarray (got %s).' % type(X)) - # yhat = _lmb(self.distr, self.beta0_, self.beta_, X, self.eta, - # fit_intercept=True) z = self.distr_._z(self.beta0_, self.beta_, X) yhat = self.distr_.mu(z) yhat = np.asarray(yhat) diff --git a/pyglmnet/utils.py b/pyglmnet/utils.py index 82df0482..6f9046c0 100644 --- a/pyglmnet/utils.py +++ b/pyglmnet/utils.py @@ -1,12 +1,9 @@ -""" -A few miscellaneous helper functions for pyglmnet.py -""" +"""A few miscellaneous helper functions for pyglmnet.""" import numpy as np from copy import copy import logging -from .distributions import BaseDistribution logger = logging.getLogger('pyglmnet') logger.addHandler(logging.StreamHandler()) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index d9c0c1c6..ef4fe3c4 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -61,7 +61,7 @@ def test_gradients(distr): glm._set_distr() y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) - func = partial(_L2loss, distr, glm.alpha, + func = partial(_L2loss, glm.distr_, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group) grad = partial(_grad_L2loss, glm.distr_, glm.alpha, glm.Tau, reg_lambda, X, y, diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py index 490a495b..7f10e689 100644 --- a/tests/test_pyglmnet.py +++ b/tests/test_pyglmnet.py @@ -172,10 +172,13 @@ def test_glmnet(distr, reg_lambda, fit_intercept, solver): group = None Tau = None + glm_ = GLM(distr=distr) + glm_._set_distr() + def callback(beta): Tau = None loss_trace.append( - _loss(distr, alpha, Tau, reg_lambda, + _loss(glm_.distr_, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, beta, fit_intercept=fit_intercept)) @@ -194,7 +197,7 @@ def callback(beta): # true loss and beta should be recovered when reg_lambda == 0 if reg_lambda == 0.: # verify loss at convergence = loss when beta=beta_ - l_true = _loss(distr, alpha, Tau, reg_lambda, + l_true = _loss(glm_.distr_, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) From c495dfc86e15f1f7d6c0288d01f25c9e96c4ccd6 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 23:31:45 -0700 Subject: [PATCH 18/24] remove more comments --- pyglmnet/pyglmnet.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 8471181a..4eae1d29 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -788,7 +788,6 @@ def _predict_proba(self, X): The predicted targets of shape (n_samples,). """ - # if self.distr not in ['binomial', 'probit']: if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError('This is only applicable for \ the binomial distribution.') @@ -824,7 +823,6 @@ def predict_proba(self, X): X = check_array(X, accept_sparse=False) check_is_fitted(self, 'is_fitted_') - # if self.distr in ['binomial', 'probit']: if isinstance(self.distr_, (Binomial, Probit)): return self._predict_proba(X) else: @@ -880,7 +878,6 @@ def score(self, X, y): # For f1 as well if self.score_metric in ['accuracy']: - # if self.distr not in ['binomial', 'multinomial']: if not isinstance(self.distr_, (Binomial, Probit)): raise ValueError(self.score_metric + ' is only defined for binomial ' + @@ -888,7 +885,6 @@ def score(self, X, y): y = np.asarray(y).ravel() - # if self.distr in ['binomial', 'probit'] and \ if isinstance(self.distr_, (Binomial, Probit)) and \ self.score_metric != 'accuracy': yhat = self.predict_proba(X) From be4cd0b564a5055ca818571ae326f2f297842607 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Tue, 18 Aug 2020 23:55:36 -0700 Subject: [PATCH 19/24] simulate methods for distribution classes --- pyglmnet/distributions.py | 61 +++++++++++++++++++++++++++++++++++++++ pyglmnet/pyglmnet.py | 22 +------------- 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index ca2f7021..527ef889 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -5,6 +5,8 @@ from scipy.special import expit, log1p, loggamma from scipy.stats import norm +from .externals.sklearn.utils import check_random_state + def softplus(z): """Numerically stable version of log(1 + exp(z)).""" @@ -48,6 +50,11 @@ def gradhess_log_likelihood_1d(self): msg = (f"""Gradients and Hessians of 1-d log likelihood are not specified for {self.__class__} distribution""") # noqa raise NotImplementedError(msg) + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + msg = (f"""Simulation of model is not implemented for {self.__class__} distribution""") # noqa + raise NotImplementedError(msg) + def _z(self, beta0, beta, X): """Compute z to be passed through non-linearity.""" return beta0 + np.dot(X, beta) @@ -86,6 +93,15 @@ def gradhess_log_likelihood_1d(self, xk, y, z): hk = np.sum(xk * xk) return gk, hk + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.normal(mu) + class Poisson(BaseDistribution): """Class for Poisson distribution.""" @@ -141,6 +157,15 @@ def gradhess_log_likelihood_1d(self, xk, y, z): (xk[z > self.eta] ** 2)) return gk, hk + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.poisson(mu) + class PoissonSoftplus(Poisson): """Class for Poisson distribution with softplus inverse link.""" @@ -219,6 +244,16 @@ def gradhess_log_likelihood_1d(self, xk, y, z): hk = np.dot(partial_beta_0.T, xk ** 2) return gk, hk + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + p = theta / (theta + mu) # Probability of success + y = _random_state.negative_binomial(theta, p) + class Binomial(BaseDistribution): """Class for binomial distribution.""" @@ -259,6 +294,15 @@ def gradhess_log_likelihood_1d(self, xk, y, z): hk = np.sum(mu * (1.0 - mu) * xk * xk) return gk, hk + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.binomial(1, mu) + class Probit(BaseDistribution): """Class for probit distribution.""" @@ -354,6 +398,15 @@ def gradhess_log_likelihood_1d(self, xk, y, z): (1 - y) * self._probit_g6(z, pdfz, cdfz)) * (xk * xk)) return gk, hk + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + _random_state = check_random_state(random_state) + return _random_state.binomial(1, mu) + class GammaSoftplus(BaseDistribution): """Class for Gamma distribution with softplus inverse link.""" @@ -391,3 +444,11 @@ def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" raise NotImplementedError('cdfast is not implemented for Gamma ' 'distribution') + + def simulate(self, beta0, beta, X, random_state, sample): + """Simulate targets y given model beta0, beta, X.""" + mu = self.mu(self._z(beta0, beta, X)) + if not sample: + return mu + else: + return np.exp(mu) diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py index 4eae1d29..b3fe8b40 100644 --- a/pyglmnet/pyglmnet.py +++ b/pyglmnet/pyglmnet.py @@ -184,27 +184,7 @@ def simulate_glm(distr, beta0, beta, X, eta=2.0, random_state=None, if not fit_intercept: beta0 = 0. - mu = glm.distr_.mu(glm.distr_._z(beta0, beta, X)) - - if not sample: - return mu - - _random_state = check_random_state(random_state) - if isinstance(distr, (PoissonSoftplus, Poisson)): - y = _random_state.poisson(mu) - - if isinstance(distr, Gaussian): - y = _random_state.normal(mu) - - if isinstance(distr, (Binomial, Probit)): - y = _random_state.binomial(1, mu) - - if isinstance(distr, GammaSoftplus): - y = np.exp(mu) - - if isinstance(distr, NegBinomialSoftplus): - p = theta / (theta + mu) # Probability of success - y = _random_state.negative_binomial(theta, p) + y = glm.distr_.simulate(beta0, beta, X, random_state, sample) return y From b2a9cced0b8aaebffbd4b199b73481e7a2318cd2 Mon Sep 17 00:00:00 2001 From: Pavan Ramkumar Date: Wed, 19 Aug 2020 00:17:30 -0700 Subject: [PATCH 20/24] flake8 --- examples/plot_custom_distributions.py | 2 +- pyglmnet/distributions.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/plot_custom_distributions.py b/examples/plot_custom_distributions.py index edab74ff..68f2cd52 100644 --- a/examples/plot_custom_distributions.py +++ b/examples/plot_custom_distributions.py @@ -33,7 +33,7 @@ # - mu: inverse link function # - grad_mu: gradient of the inverse link function # - log_likelihood: the log likelihood function -# - grad_log_likelihood: the gradient of the log +# - grad_log_likelihood: the gradient of the log # likelihood. # All distributions in pyglmnet inherit from BaseDistribution # diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 527ef889..62708ac6 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -145,7 +145,6 @@ def grad_log_likelihood(self, X, y, beta0, beta): def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" mu = self.mu(z) - s = expit(z) gk = np.sum((mu[z <= self.eta] - y[z <= self.eta]) * xk[z <= self.eta]) + \ np.exp(self.eta) * \ @@ -251,8 +250,8 @@ def simulate(self, beta0, beta, X, random_state, sample): return mu else: _random_state = check_random_state(random_state) - p = theta / (theta + mu) # Probability of success - y = _random_state.negative_binomial(theta, p) + p = self.theta / (self.theta + mu) # Probability of success + return _random_state.negative_binomial(self.theta, p) class Binomial(BaseDistribution): From 179e1c6ab3a039783b5bdc42b3dac07544193a29 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Wed, 19 Aug 2020 12:44:40 -0400 Subject: [PATCH 21/24] Flake8 and style --- examples/plot_custom_distributions.py | 4 +- pyglmnet/distributions.py | 112 +++++++++--------- .../externals/sklearn/utils/validation.py | 2 +- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/examples/plot_custom_distributions.py b/examples/plot_custom_distributions.py index 68f2cd52..a13d38f6 100644 --- a/examples/plot_custom_distributions.py +++ b/examples/plot_custom_distributions.py @@ -41,8 +41,8 @@ # the existing Binomial distribution and override mu and grad_mu # if we want to use the cloglog link function. -import numpy as np -from pyglmnet.distributions import Binomial +import numpy as np # noqa: E402 +from pyglmnet.distributions import Binomial # noqa: E402 class CustomBinomial(Binomial): diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 62708ac6..a6357778 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -303,60 +303,66 @@ def simulate(self, beta0, beta, X, random_state, sample): return _random_state.binomial(1, mu) +def _probit_g1(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) + res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) + res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] + return res + + +def _probit_g2(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] + res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) + return res + + +def _probit_g3(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = -z[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] + res[z > thresh] = pdfz[z > thresh] + return res + + +def _probit_g4(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = z[z > thresh] + return res + + +def _probit_g5(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = 0 * z[z < -thresh] + res[np.abs(z) <= thresh] = \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / + cdfz[np.abs(z) <= thresh]) ** 2 + res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 + return res + + +def _probit_g6(z, pdfz, cdfz, thresh=5): + res = np.zeros_like(z) + res[z < -thresh] = \ + pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] + res[np.abs(z) <= thresh] = \ + (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ + z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ + (1 - cdfz[np.abs(z) <= thresh]) + res[z > thresh] = 0 * z[z > thresh] + return res + + class Probit(BaseDistribution): """Class for probit distribution.""" - def _probit_g1(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = np.log(-pdfz[z < -thresh] / z[z < -thresh]) - res[np.abs(z) <= thresh] = np.log(cdfz[np.abs(z) <= thresh]) - res[z > thresh] = -pdfz[z > thresh] / z[z > thresh] - return res - - def _probit_g2(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] / z[z < -thresh] - res[np.abs(z) <= thresh] = np.log(1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = np.log(pdfz[z > thresh] / z[z > thresh]) - return res - - def _probit_g3(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = -z[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / cdfz[np.abs(z) <= thresh] - res[z > thresh] = pdfz[z > thresh] - return res - - def _probit_g4(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = z[z > thresh] - return res - - def _probit_g5(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = 0 * z[z < -thresh] - res[np.abs(z) <= thresh] = \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - cdfz[np.abs(z) <= thresh] + (pdfz[np.abs(z) <= thresh] / - cdfz[np.abs(z) <= thresh]) ** 2 - res[z > thresh] = z[z > thresh] * pdfz[z > thresh] + pdfz[z > thresh] ** 2 - return res - - def _probit_g6(self, z, pdfz, cdfz, thresh=5): - res = np.zeros_like(z) - res[z < -thresh] = \ - pdfz[z < -thresh] ** 2 - z[z < -thresh] * pdfz[z < -thresh] - res[np.abs(z) <= thresh] = \ - (pdfz[np.abs(z) <= thresh] / (1 - cdfz[np.abs(z) <= thresh])) ** 2 - \ - z[np.abs(z) <= thresh] * pdfz[np.abs(z) <= thresh] / \ - (1 - cdfz[np.abs(z) <= thresh]) - res[z > thresh] = 0 * z[z > thresh] - return res - def mu(self, z): """Inverse link function.""" mu = norm.cdf(z) @@ -372,8 +378,8 @@ def log_likelihood(self, y, y_hat, z=None): if z is not None: pdfz, cdfz = norm.pdf(z), norm.cdf(z) log_likelihood = \ - np.sum(y * self._probit_g1(z, pdfz, cdfz) + - (1 - y) * self._probit_g2(z, pdfz, cdfz)) + np.sum(y * _probit_g1(z, pdfz, cdfz) + + (1 - y) * _probit_g2(z, pdfz, cdfz)) else: log_likelihood = \ np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) diff --git a/pyglmnet/externals/sklearn/utils/validation.py b/pyglmnet/externals/sklearn/utils/validation.py index 7cda34e4..3e09c504 100644 --- a/pyglmnet/externals/sklearn/utils/validation.py +++ b/pyglmnet/externals/sklearn/utils/validation.py @@ -119,7 +119,7 @@ def check_consistent_length(*arrays): uniques = np.unique(lengths) if len(uniques) > 1: raise ValueError("Found input variables with inconsistent numbers of" - " samples: %r" % [int(l) for l in lengths]) + " samples: %r" % [int(ll) for ll in lengths]) def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, From 615fcf90275f8c1a3614d068531c9fedb270e2d5 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Wed, 19 Aug 2020 13:14:48 -0400 Subject: [PATCH 22/24] DOC update api and refactor softplus --- doc/api.rst | 30 +++++++++++++++++++----- pyglmnet/distributions.py | 49 +++++++++++++-------------------------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index fd0f5047..9d5b1f0b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -6,14 +6,32 @@ API Documentation .. currentmodule:: pyglmnet -Classes -======= +GLM Classes +=========== -.. autoclass:: GLM - :members: +.. currentmodule:: pyglmnet + +.. autosummary:: + :toctree: generated/ + + GLM + GLMCV + +Distribution Classes +==================== + +.. currentmodule:: pyglmnet.distributions + +.. autosummary:: + :toctree: generated/ -.. autoclass:: GLMCV - :members: + BaseDistribution + Poisson + PoissonSoftplus + NegBinomialSoftplus + Binomial + Probit + GammaSoftplus Datasets diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index a6357778..988a7ca6 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -60,6 +60,19 @@ def _z(self, beta0, beta, X): return beta0 + np.dot(X, beta) +class SoftplusLink: + + def mu(self, z): + """Inverse link function.""" + mu = softplus(z) + return mu + + def grad_mu(self, z): + """Gradient of inverse link.""" + grad_mu = expit(z) + return grad_mu + + class Gaussian(BaseDistribution): """Class for Gaussian distribution.""" @@ -166,19 +179,9 @@ def simulate(self, beta0, beta, X, random_state, sample): return _random_state.poisson(mu) -class PoissonSoftplus(Poisson): +class PoissonSoftplus(Poisson, SoftplusLink): """Class for Poisson distribution with softplus inverse link.""" - def mu(self, z): - """Inverse link function.""" - mu = softplus(z) - return mu - - def grad_mu(self, z): - """Gradient of inverse link.""" - grad_mu = expit(z) - return grad_mu - def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" mu = self.mu(z) @@ -191,23 +194,13 @@ def gradhess_log_likelihood_1d(self, xk, y, z): return gk, hk -class NegBinomialSoftplus(BaseDistribution): +class NegBinomialSoftplus(BaseDistribution, SoftplusLink): """Class for Negative binomial distribution with softplus inverse link.""" def __init__(self): """init.""" self.theta = None - def mu(self, z): - """Inverse link function.""" - mu = softplus(z) - return mu - - def grad_mu(self, z): - """Gradient of inverse link.""" - grad_mu = expit(z) - return grad_mu - def log_likelihood(self, y, y_hat): """Log L2-penalized likelihood.""" theta = self.theta @@ -413,19 +406,9 @@ def simulate(self, beta0, beta, X, random_state, sample): return _random_state.binomial(1, mu) -class GammaSoftplus(BaseDistribution): +class GammaSoftplus(BaseDistribution, SoftplusLink): """Class for Gamma distribution with softplus inverse link.""" - def mu(self, z): - """Inverse link function.""" - mu = softplus(z) - return mu - - def grad_mu(self, z): - """Gradient of inverse link.""" - grad_mu = expit(z) - return grad_mu - def log_likelihood(self, y, y_hat, z=None): """Log L2-penalized likelihood.""" # see From 9bdab7638565220005ee672fe708884934c44f9a Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Wed, 19 Aug 2020 13:16:34 -0400 Subject: [PATCH 23/24] FIX probit --- pyglmnet/distributions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 988a7ca6..9f87b9fd 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -390,10 +390,10 @@ def gradhess_log_likelihood_1d(self, xk, y, z): """One-dimensional Gradient and Hessian of log likelihood.""" pdfz = norm.pdf(z) cdfz = norm.cdf(z) - gk = -np.sum((y * self._probit_g3(z, pdfz, cdfz) - - (1 - y) * self._probit_g4(z, pdfz, cdfz)) * xk) - hk = np.sum((y * self._probit_g5(z, pdfz, cdfz) + - (1 - y) * self._probit_g6(z, pdfz, cdfz)) * (xk * xk)) + gk = -np.sum((y * _probit_g3(z, pdfz, cdfz) - + (1 - y) * _probit_g4(z, pdfz, cdfz)) * xk) + hk = np.sum((y * _probit_g5(z, pdfz, cdfz) + + (1 - y) * _probit_g6(z, pdfz, cdfz)) * (xk * xk)) return gk, hk def simulate(self, beta0, beta, X, random_state, sample): From 081fa04820881a0588731ce5588a75bc9eaee52c Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Wed, 19 Aug 2020 13:30:14 -0400 Subject: [PATCH 24/24] TST make basedistribution happy --- pyglmnet/distributions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyglmnet/distributions.py b/pyglmnet/distributions.py index 9f87b9fd..ffdc7279 100644 --- a/pyglmnet/distributions.py +++ b/pyglmnet/distributions.py @@ -179,7 +179,7 @@ def simulate(self, beta0, beta, X, random_state, sample): return _random_state.poisson(mu) -class PoissonSoftplus(Poisson, SoftplusLink): +class PoissonSoftplus(SoftplusLink, Poisson): """Class for Poisson distribution with softplus inverse link.""" def gradhess_log_likelihood_1d(self, xk, y, z): @@ -194,7 +194,7 @@ def gradhess_log_likelihood_1d(self, xk, y, z): return gk, hk -class NegBinomialSoftplus(BaseDistribution, SoftplusLink): +class NegBinomialSoftplus(SoftplusLink, BaseDistribution): """Class for Negative binomial distribution with softplus inverse link.""" def __init__(self): @@ -406,7 +406,7 @@ def simulate(self, beta0, beta, X, random_state, sample): return _random_state.binomial(1, mu) -class GammaSoftplus(BaseDistribution, SoftplusLink): +class GammaSoftplus(SoftplusLink, BaseDistribution): """Class for Gamma distribution with softplus inverse link.""" def log_likelihood(self, y, y_hat, z=None):