@@ -222,7 +222,8 @@ class ParametericUnivariateFitter(UnivariateFitter):
222222
223223 """
224224
225- _MIN_PARAMETER_VALUE = 0.000001
225+ _KNOWN_MODEL = False
226+ _MIN_PARAMETER_VALUE = 1e-09
226227
227228 def __init__ (self , * args , ** kwargs ):
228229 super (ParametericUnivariateFitter , self ).__init__ (* args , ** kwargs )
@@ -232,49 +233,54 @@ def __init__(self, *args, **kwargs):
232233 # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
233234 self ._hazard = egrad (self ._cumulative_hazard , argnum = 1 )
234235 if not hasattr (self , "_bounds" ):
235- self ._bounds = [(self ._MIN_PARAMETER_VALUE , None )] * len (self ._fitted_parameter_names )
236+ self ._bounds = [(0 , None )] * len (self ._fitted_parameter_names )
237+ self ._bounds = list (self ._buffer_bounds (self ._bounds ))
238+
236239 if not hasattr (self , "_initial_values" ):
237240 self ._initial_values = np .array (list (self ._initial_values_from_bounds ()))
238241
239242 if "alpha" in self ._fitted_parameter_names :
240243 raise NameError ("'alpha' in _fitted_parameter_names is a lifelines reserved word. Try 'alpha_' instead." )
241244
242- def _check_cumulative_hazard_is_monotone_and_positive (self , durations ):
245+ if len (self ._bounds ) != len (self ._fitted_parameter_names ) != self ._initial_values .shape [0 ]:
246+ raise ValueError (
247+ "_bounds must be the same shape as _fitted_parameters_names must be the same shape as _initial_values"
248+ )
249+
250+ def _check_cumulative_hazard_is_monotone_and_positive (self , durations , values ):
243251 class_name = self .__class__ .__name__
244252
245- cumulative_hazard = self ._cumulative_hazard (self . _initial_values , durations )
253+ cumulative_hazard = self ._cumulative_hazard (values , durations )
246254 if not np .all (cumulative_hazard > 0 ):
247255 warnings .warn (
248256 dedent (
249257 """\
250258 Cumulative hazard is not strictly positive. For example, try:
251259
252- >>> test_times = np.linspace(0.01, 100, 15)
253260 >>> fitter = {0}()
254- >>> fitter._cumulative_hazard(fitter._initial_values, test_times )
261+ >>> fitter._cumulative_hazard(np.{1}, np.sort(durations) )
255262
256263 This may harm convergence, or return nonsensical results.
257264 """ .format (
258- class_name
265+ class_name , values . __repr__ ()
259266 )
260267 ),
261268 StatisticalWarning ,
262269 )
263270
264- derivative_of_cumulative_hazard = self ._hazard (self . _initial_values , durations )
271+ derivative_of_cumulative_hazard = self ._hazard (values , durations )
265272 if not np .all (derivative_of_cumulative_hazard >= 0 ):
266273 warnings .warn (
267274 dedent (
268275 """\
269276 Cumulative hazard is not strictly non-decreasing. For example, try:
270277
271- >>> test_times = np.linspace(0.01, 100, 15)
272278 >>> fitter = {0}()
273- >>> fitter._hazard(fitter._initial_values, test_times )
279+ >>> fitter._hazard({1}, np.sort(durations) )
274280
275281 This may harm convergence, or return nonsensical results.
276282 """ .format (
277- class_name
283+ class_name , values . __repr__ ()
278284 )
279285 ),
280286 StatisticalWarning ,
@@ -291,18 +297,33 @@ def _initial_values_from_bounds(self):
291297 else :
292298 yield (ub - lb ) / 2
293299
300+ def _buffer_bounds (self , bounds ):
301+ for (lb , ub ) in bounds :
302+ if lb is None and ub is None :
303+ yield (None , None )
304+ elif lb is None :
305+ yield (None , self ._MIN_PARAMETER_VALUE )
306+ elif ub is None :
307+ yield (self ._MIN_PARAMETER_VALUE , None )
308+ else :
309+ yield (lb + self ._MIN_PARAMETER_VALUE , ub - self ._MIN_PARAMETER_VALUE )
310+
294311 def _cumulative_hazard (self , params , times ):
295312 raise NotImplementedError
296313
297314 def _survival_function (self , params , times ):
298315 return anp .exp (- self ._cumulative_hazard (params , times ))
299316
300- def _negative_log_likelihood (self , params , T , E ):
317+ def _negative_log_likelihood (self , params , T , E , entry ):
301318 n = T .shape [0 ]
302319 hz = self ._hazard (params , T [E ])
303320 hz = anp .clip (hz , 1e-18 , np .inf )
304321
305- ll = (anp .log (hz )).sum () - self ._cumulative_hazard (params , T ).sum ()
322+ ll = (
323+ (anp .log (hz )).sum ()
324+ - self ._cumulative_hazard (params , T ).sum ()
325+ + self ._cumulative_hazard (params , entry ).sum ()
326+ )
306327 return - ll / n
307328
308329 def _compute_confidence_bounds_of_cumulative_hazard (self , alpha , ci_labels ):
@@ -315,7 +336,7 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
315336 )
316337
317338 gradient_at_times = np .vstack (
318- [gradient_of_cum_hazard_at_mle (basis )[ 1 ] for basis in np .eye (len (self ._fitted_parameters_ ))]
339+ [gradient_of_cum_hazard_at_mle (basis ) for basis in np .eye (len (self ._fitted_parameters_ ))]
319340 )
320341
321342 std_cumulative_hazard = np .sqrt (
@@ -325,13 +346,13 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
325346 if ci_labels is None :
326347 ci_labels = ["%s_upper_%.2f" % (self ._label , alpha ), "%s_lower_%.2f" % (self ._label , alpha )]
327348 assert len (ci_labels ) == 2 , "ci_labels should be a length 2 array."
328-
329349 df [ci_labels [0 ]] = self .cumulative_hazard_at_times (self .timeline ) + alpha2 * std_cumulative_hazard
330350 df [ci_labels [1 ]] = self .cumulative_hazard_at_times (self .timeline ) - alpha2 * std_cumulative_hazard
331351 return df
332352
333- def _fit_model (self , T , E , show_progress = True ):
353+ def _fit_model (self , T , E , entry , show_progress = True ):
334354
355+ non_zero_entries = entry [entry > 0 ]
335356 with warnings .catch_warnings ():
336357 warnings .simplefilter ("ignore" )
337358
@@ -340,14 +361,14 @@ def _fit_model(self, T, E, show_progress=True):
340361 self ._initial_values ,
341362 jac = True ,
342363 method = "L-BFGS-B" ,
343- args = (T , E ),
364+ args = (T , E , non_zero_entries ),
344365 bounds = self ._bounds ,
345366 options = {"disp" : show_progress },
346367 )
347368
348369 if results .success :
349370 # pylint: disable=no-value-for-parameter
350- hessian_ = hessian (self ._negative_log_likelihood )(results .x , T , E )
371+ hessian_ = hessian (self ._negative_log_likelihood )(results .x , T , E , non_zero_entries )
351372 return results .x , - results .fun , hessian_ * T .shape [0 ]
352373 print (results )
353374 raise ConvergenceError (
@@ -449,7 +470,15 @@ def print_summary(self, decimals=2, **kwargs):
449470 print (df .to_string (float_format = format_floats (decimals ), formatters = {"p" : format_p_value (decimals )}))
450471
451472 def fit (
452- self , durations , event_observed = None , timeline = None , label = None , alpha = None , ci_labels = None , show_progress = False
473+ self ,
474+ durations ,
475+ event_observed = None ,
476+ timeline = None ,
477+ label = None ,
478+ alpha = None ,
479+ ci_labels = None ,
480+ show_progress = False ,
481+ entry = None ,
453482 ): # pylint: disable=too-many-arguments
454483 """
455484 Parameters
@@ -471,6 +500,9 @@ def fit(
471500 as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>
472501 show_progress: boolean, optional
473502 since this is an iterative fitting algorithm, switching this to True will display some iteration details.
503+ entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
504+ useful for left-truncated (not left-censored) observations. If None, all members of the population
505+ entered study when they were "born": time zero.
474506
475507 Returns
476508 -------
@@ -491,12 +523,15 @@ def fit(
491523 "This model does not allow for non-positive durations. Suggestion: add a small positive value to zero elements."
492524 )
493525
494- self ._check_cumulative_hazard_is_monotone_and_positive (self .durations )
526+ if not self ._KNOWN_MODEL :
527+ self ._check_cumulative_hazard_is_monotone_and_positive (self .durations , self ._initial_values )
495528
496529 self .event_observed = (
497530 np .asarray (event_observed , dtype = int ) if event_observed is not None else np .ones_like (self .durations )
498531 )
499532
533+ self .entry = np .asarray (entry ) if entry is not None else np .zeros_like (self .durations )
534+
500535 if timeline is not None :
501536 self .timeline = np .sort (np .asarray (timeline ))
502537 else :
@@ -507,9 +542,12 @@ def fit(
507542
508543 # estimation
509544 self ._fitted_parameters_ , self ._log_likelihood , self ._hessian_ = self ._fit_model (
510- self .durations , self .event_observed .astype (bool ), show_progress = show_progress
545+ self .durations , self .event_observed .astype (bool ), self . entry , show_progress = show_progress
511546 )
512547
548+ if not self ._KNOWN_MODEL :
549+ self ._check_cumulative_hazard_is_monotone_and_positive (self .durations , self ._fitted_parameters_ )
550+
513551 for param_name , fitted_value in zip (self ._fitted_parameter_names , self ._fitted_parameters_ ):
514552 setattr (self , param_name , fitted_value )
515553
@@ -561,3 +599,8 @@ def hazard_at_times(self, times):
561599 @_must_call_fit_first
562600 def median_ (self ):
563601 return median_survival_times (self .survival_function_ )
602+
603+
604+ class KnownModelParametericUnivariateFitter (ParametericUnivariateFitter ):
605+
606+ _KNOWN_MODEL = True
0 commit comments