Skip to content

Commit 134e2c5

Browse files
V0.18.4 (#631)
* v0.18.4 * lint and add a test
1 parent 0fb9430 commit 134e2c5

File tree

12 files changed

+167
-62
lines changed

12 files changed

+167
-62
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
### Changelogs
22

3+
### 0.18.4
4+
- fixed confidence intervals in cumulative hazards for parametric univarite models. They were previously
5+
serverly depressed.
6+
- adding left-truncation support to parametric univarite models with the `entry` kwarg in `.fit`
7+
38
### 0.18.3
49
- Some performance improvements to parametric univariate models.
510
- Suppressing some irrelevant NumPy and autograd warnings, so lifeline warnings are more noticeable.

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
#
6161
# The short X.Y version.
6262

63-
version = "0.18.3"
63+
version = "0.18.4"
6464
# The full version, including dev info
6565
release = version
6666

docs/jupyter_notebooks/Modelling time-lagged conversion rates.ipynb

Lines changed: 21 additions & 21 deletions
Large diffs are not rendered by default.

lifelines/fitters/__init__.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ class ParametericUnivariateFitter(UnivariateFitter):
222222
223223
"""
224224

225-
_MIN_PARAMETER_VALUE = 0.000001
225+
_KNOWN_MODEL = False
226+
_MIN_PARAMETER_VALUE = 1e-09
226227

227228
def __init__(self, *args, **kwargs):
228229
super(ParametericUnivariateFitter, self).__init__(*args, **kwargs)
@@ -232,49 +233,54 @@ def __init__(self, *args, **kwargs):
232233
# pylint: disable=no-value-for-parameter,unexpected-keyword-arg
233234
self._hazard = egrad(self._cumulative_hazard, argnum=1)
234235
if not hasattr(self, "_bounds"):
235-
self._bounds = [(self._MIN_PARAMETER_VALUE, None)] * len(self._fitted_parameter_names)
236+
self._bounds = [(0, None)] * len(self._fitted_parameter_names)
237+
self._bounds = list(self._buffer_bounds(self._bounds))
238+
236239
if not hasattr(self, "_initial_values"):
237240
self._initial_values = np.array(list(self._initial_values_from_bounds()))
238241

239242
if "alpha" in self._fitted_parameter_names:
240243
raise NameError("'alpha' in _fitted_parameter_names is a lifelines reserved word. Try 'alpha_' instead.")
241244

242-
def _check_cumulative_hazard_is_monotone_and_positive(self, durations):
245+
if len(self._bounds) != len(self._fitted_parameter_names) != self._initial_values.shape[0]:
246+
raise ValueError(
247+
"_bounds must be the same shape as _fitted_parameters_names must be the same shape as _initial_values"
248+
)
249+
250+
def _check_cumulative_hazard_is_monotone_and_positive(self, durations, values):
243251
class_name = self.__class__.__name__
244252

245-
cumulative_hazard = self._cumulative_hazard(self._initial_values, durations)
253+
cumulative_hazard = self._cumulative_hazard(values, durations)
246254
if not np.all(cumulative_hazard > 0):
247255
warnings.warn(
248256
dedent(
249257
"""\
250258
Cumulative hazard is not strictly positive. For example, try:
251259
252-
>>> test_times = np.linspace(0.01, 100, 15)
253260
>>> fitter = {0}()
254-
>>> fitter._cumulative_hazard(fitter._initial_values, test_times)
261+
>>> fitter._cumulative_hazard(np.{1}, np.sort(durations))
255262
256263
This may harm convergence, or return nonsensical results.
257264
""".format(
258-
class_name
265+
class_name, values.__repr__()
259266
)
260267
),
261268
StatisticalWarning,
262269
)
263270

264-
derivative_of_cumulative_hazard = self._hazard(self._initial_values, durations)
271+
derivative_of_cumulative_hazard = self._hazard(values, durations)
265272
if not np.all(derivative_of_cumulative_hazard >= 0):
266273
warnings.warn(
267274
dedent(
268275
"""\
269276
Cumulative hazard is not strictly non-decreasing. For example, try:
270277
271-
>>> test_times = np.linspace(0.01, 100, 15)
272278
>>> fitter = {0}()
273-
>>> fitter._hazard(fitter._initial_values, test_times)
279+
>>> fitter._hazard({1}, np.sort(durations))
274280
275281
This may harm convergence, or return nonsensical results.
276282
""".format(
277-
class_name
283+
class_name, values.__repr__()
278284
)
279285
),
280286
StatisticalWarning,
@@ -291,18 +297,33 @@ def _initial_values_from_bounds(self):
291297
else:
292298
yield (ub - lb) / 2
293299

300+
def _buffer_bounds(self, bounds):
301+
for (lb, ub) in bounds:
302+
if lb is None and ub is None:
303+
yield (None, None)
304+
elif lb is None:
305+
yield (None, self._MIN_PARAMETER_VALUE)
306+
elif ub is None:
307+
yield (self._MIN_PARAMETER_VALUE, None)
308+
else:
309+
yield (lb + self._MIN_PARAMETER_VALUE, ub - self._MIN_PARAMETER_VALUE)
310+
294311
def _cumulative_hazard(self, params, times):
295312
raise NotImplementedError
296313

297314
def _survival_function(self, params, times):
298315
return anp.exp(-self._cumulative_hazard(params, times))
299316

300-
def _negative_log_likelihood(self, params, T, E):
317+
def _negative_log_likelihood(self, params, T, E, entry):
301318
n = T.shape[0]
302319
hz = self._hazard(params, T[E])
303320
hz = anp.clip(hz, 1e-18, np.inf)
304321

305-
ll = (anp.log(hz)).sum() - self._cumulative_hazard(params, T).sum()
322+
ll = (
323+
(anp.log(hz)).sum()
324+
- self._cumulative_hazard(params, T).sum()
325+
+ self._cumulative_hazard(params, entry).sum()
326+
)
306327
return -ll / n
307328

308329
def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
@@ -315,7 +336,7 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
315336
)
316337

317338
gradient_at_times = np.vstack(
318-
[gradient_of_cum_hazard_at_mle(basis)[1] for basis in np.eye(len(self._fitted_parameters_))]
339+
[gradient_of_cum_hazard_at_mle(basis) for basis in np.eye(len(self._fitted_parameters_))]
319340
)
320341

321342
std_cumulative_hazard = np.sqrt(
@@ -325,13 +346,13 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
325346
if ci_labels is None:
326347
ci_labels = ["%s_upper_%.2f" % (self._label, alpha), "%s_lower_%.2f" % (self._label, alpha)]
327348
assert len(ci_labels) == 2, "ci_labels should be a length 2 array."
328-
329349
df[ci_labels[0]] = self.cumulative_hazard_at_times(self.timeline) + alpha2 * std_cumulative_hazard
330350
df[ci_labels[1]] = self.cumulative_hazard_at_times(self.timeline) - alpha2 * std_cumulative_hazard
331351
return df
332352

333-
def _fit_model(self, T, E, show_progress=True):
353+
def _fit_model(self, T, E, entry, show_progress=True):
334354

355+
non_zero_entries = entry[entry > 0]
335356
with warnings.catch_warnings():
336357
warnings.simplefilter("ignore")
337358

@@ -340,14 +361,14 @@ def _fit_model(self, T, E, show_progress=True):
340361
self._initial_values,
341362
jac=True,
342363
method="L-BFGS-B",
343-
args=(T, E),
364+
args=(T, E, non_zero_entries),
344365
bounds=self._bounds,
345366
options={"disp": show_progress},
346367
)
347368

348369
if results.success:
349370
# pylint: disable=no-value-for-parameter
350-
hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E)
371+
hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E, non_zero_entries)
351372
return results.x, -results.fun, hessian_ * T.shape[0]
352373
print(results)
353374
raise ConvergenceError(
@@ -449,7 +470,15 @@ def print_summary(self, decimals=2, **kwargs):
449470
print(df.to_string(float_format=format_floats(decimals), formatters={"p": format_p_value(decimals)}))
450471

451472
def fit(
452-
self, durations, event_observed=None, timeline=None, label=None, alpha=None, ci_labels=None, show_progress=False
473+
self,
474+
durations,
475+
event_observed=None,
476+
timeline=None,
477+
label=None,
478+
alpha=None,
479+
ci_labels=None,
480+
show_progress=False,
481+
entry=None,
453482
): # pylint: disable=too-many-arguments
454483
"""
455484
Parameters
@@ -471,6 +500,9 @@ def fit(
471500
as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>
472501
show_progress: boolean, optional
473502
since this is an iterative fitting algorithm, switching this to True will display some iteration details.
503+
entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
504+
useful for left-truncated (not left-censored) observations. If None, all members of the population
505+
entered study when they were "born": time zero.
474506
475507
Returns
476508
-------
@@ -491,12 +523,15 @@ def fit(
491523
"This model does not allow for non-positive durations. Suggestion: add a small positive value to zero elements."
492524
)
493525

494-
self._check_cumulative_hazard_is_monotone_and_positive(self.durations)
526+
if not self._KNOWN_MODEL:
527+
self._check_cumulative_hazard_is_monotone_and_positive(self.durations, self._initial_values)
495528

496529
self.event_observed = (
497530
np.asarray(event_observed, dtype=int) if event_observed is not None else np.ones_like(self.durations)
498531
)
499532

533+
self.entry = np.asarray(entry) if entry is not None else np.zeros_like(self.durations)
534+
500535
if timeline is not None:
501536
self.timeline = np.sort(np.asarray(timeline))
502537
else:
@@ -507,9 +542,12 @@ def fit(
507542

508543
# estimation
509544
self._fitted_parameters_, self._log_likelihood, self._hessian_ = self._fit_model(
510-
self.durations, self.event_observed.astype(bool), show_progress=show_progress
545+
self.durations, self.event_observed.astype(bool), self.entry, show_progress=show_progress
511546
)
512547

548+
if not self._KNOWN_MODEL:
549+
self._check_cumulative_hazard_is_monotone_and_positive(self.durations, self._fitted_parameters_)
550+
513551
for param_name, fitted_value in zip(self._fitted_parameter_names, self._fitted_parameters_):
514552
setattr(self, param_name, fitted_value)
515553

@@ -561,3 +599,8 @@ def hazard_at_times(self, times):
561599
@_must_call_fit_first
562600
def median_(self):
563601
return median_survival_times(self.survival_function_)
602+
603+
604+
class KnownModelParametericUnivariateFitter(ParametericUnivariateFitter):
605+
606+
_KNOWN_MODEL = True

lifelines/fitters/exponential_fitter.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import print_function
33
import numpy as np
4-
from lifelines.fitters import ParametericUnivariateFitter
4+
from lifelines.fitters import KnownModelParametericUnivariateFitter
55

66

7-
class ExponentialFitter(ParametericUnivariateFitter):
7+
class ExponentialFitter(KnownModelParametericUnivariateFitter):
88
r"""
99
This class implements an Exponential model for univariate data. The model has parameterized
1010
form:
@@ -36,7 +36,8 @@ class ExponentialFitter(ParametericUnivariateFitter):
3636
def median_(self):
3737
return 1.0 / self.lambda_ * (np.log(2))
3838

39-
def _fit_model(self, T, E, show_progress=False):
39+
def _fit_model(self, T, E, entry, show_progress=False):
40+
T = T - entry
4041
lambda_ = E.sum() / T.sum()
4142
lambda_variance_ = lambda_ / T.sum()
4243
log_likelihood = np.log(lambda_) * E.sum() - lambda_ * T.sum()

lifelines/fitters/log_logistic_fitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from __future__ import print_function, division
33
import autograd.numpy as np
44

5-
from lifelines.fitters import ParametericUnivariateFitter
5+
from lifelines.fitters import KnownModelParametericUnivariateFitter
66

77

8-
class LogLogisticFitter(ParametericUnivariateFitter):
8+
class LogLogisticFitter(KnownModelParametericUnivariateFitter):
99

1010
r"""
1111
This class implements a Log-Logistic model for univariate data. The model has parameterized

lifelines/fitters/log_normal_fitter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
import autograd.numpy as np
55
from autograd.scipy.stats import norm
66

7-
from lifelines.fitters import ParametericUnivariateFitter
7+
from lifelines.fitters import KnownModelParametericUnivariateFitter
88

99

10-
class LogNormalFitter(ParametericUnivariateFitter):
10+
class LogNormalFitter(KnownModelParametericUnivariateFitter):
1111
r"""
1212
This class implements an Log Normal model for univariate data. The model has parameterized
1313
form:
@@ -28,7 +28,7 @@ class LogNormalFitter(ParametericUnivariateFitter):
2828
"""
2929

3030
_fitted_parameter_names = ["mu_", "sigma_"]
31-
_bounds = [(None, None), (ParametericUnivariateFitter._MIN_PARAMETER_VALUE, None)]
31+
_bounds = [(None, None), (0, None)]
3232

3333
@property
3434
def median_(self):

lifelines/fitters/piecewise_exponential_fitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from __future__ import print_function, division
33
import autograd.numpy as np
44

5-
from lifelines.fitters import ParametericUnivariateFitter
5+
from lifelines.fitters import KnownModelParametericUnivariateFitter
66

77

8-
class PiecewiseExponentialFitter(ParametericUnivariateFitter):
8+
class PiecewiseExponentialFitter(KnownModelParametericUnivariateFitter):
99
r"""
1010
This class implements an Piecewise Exponential model for univariate data. The model has parameterized
1111
hazard rate:

lifelines/fitters/weibull_fitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from __future__ import print_function, division
33
import autograd.numpy as np
44

5-
from lifelines.fitters import ParametericUnivariateFitter
5+
from lifelines.fitters import KnownModelParametericUnivariateFitter
66

77

8-
class WeibullFitter(ParametericUnivariateFitter):
8+
class WeibullFitter(KnownModelParametericUnivariateFitter):
99

1010
r"""
1111
This class implements a Weibull model for univariate data. The model has parameterized

lifelines/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import unicode_literals
33

4-
__version__ = "0.18.3"
4+
__version__ = "0.18.4"

0 commit comments

Comments
 (0)