Skip to content

Commit 3572abe

Browse files
authored
Merge pull request #636 from henrydingliu/master
Adding a test for drop_valuation
2 parents ae858c2 + 3c185b1 commit 3572abe

File tree

6 files changed

+76
-27
lines changed

6 files changed

+76
-27
lines changed

chainladder/development/barnzehn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def fit(self, X, y=None, sample_weight=None):
7575
self.model_ = DevelopmentML(Pipeline(steps=[
7676
('design_matrix', PatsyFormula(self.formula)),
7777
('model', LinearRegression(fit_intercept=False))]),
78-
y_ml=response, fit_incrementals=False, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(tri)
78+
y_ml=response, fit_incrementals=True, feat_eng = self.feat_eng, drop=self.drop, drop_valuation = self.drop_valuation, weighted_step = 'model').fit(X = tri, sample_weight = sample_weight)
7979
resid = tri - self.model_.triangle_ml_[
8080
self.model_.triangle_ml_.valuation <= tri.valuation_date]
8181
self.mse_resid_ = (resid**2).sum(0).sum(1).sum(2).sum() / (
@@ -100,7 +100,7 @@ def transform(self, X):
100100
X_new : New triangle with transformed attributes.
101101
"""
102102
X_new = X.copy()
103-
X_ml, weight_ml = self.model_._prep_X_ml(X.cum_to_incr().log())
103+
X_ml = self.model_._prep_X_ml(X.cum_to_incr().log())
104104
y_ml = self.model_.estimator_ml.predict(X_ml)
105105
triangle_ml, predicted_data = self.model_._get_triangle_ml(X_ml, y_ml)
106106
backend = "cupy" if X.array_backend == "cupy" else "numpy"

chainladder/development/glm.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@ class TweedieGLM(DevelopmentBase):
2222
2323
Parameters
2424
----------
25+
drop: tuple or list of tuples
26+
Drops specific origin/development combination(s)
27+
drop_valuation: str or list of str (default = None)
28+
Drops specific valuation periods. str must be date convertible.
2529
design_matrix: formula-like
2630
A patsy formula describing the independent variables, X of the GLM
2731
response: str
2832
Column name for the reponse variable of the GLM. If ommitted, then the
2933
first column of the Triangle will be used.
30-
weight: str
31-
Column name of any weight to use in the GLM. If none specified, then an
32-
unweighted regression will be performed.
33-
power: float, default=0
34+
power: float, default=1
3435
The power determines the underlying target distribution according
3536
to the following table:
3637
+-------+------------------------+
@@ -52,7 +53,7 @@ class TweedieGLM(DevelopmentBase):
5253
regularization strength. ``alpha = 0`` is equivalent to unpenalized
5354
GLMs. In this case, the design matrix `X` must have full column rank
5455
(no collinearities).
55-
link: {'auto', 'identity', 'log'}, default='auto'
56+
link: {'auto', 'identity', 'log'}, default='log'
5657
The link function of the GLM, i.e. mapping from linear predictor
5758
`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
5859
the link depending on the chosen family as follows:
@@ -78,10 +79,11 @@ class TweedieGLM(DevelopmentBase):
7879
"""
7980

8081
def __init__(self, design_matrix='C(development) + C(origin)',
81-
response=None, weight=None, power=1.0, alpha=1.0, link='log',
82-
max_iter=100, tol=0.0001, warm_start=False, verbose=0):
82+
response=None, power=1.0, alpha=1.0, link='log',
83+
max_iter=100, tol=0.0001, warm_start=False, verbose=0, drop=None,drop_valuation=None):
84+
self.drop = drop
85+
self.drop_valuation = drop_valuation
8386
self.response=response
84-
self.weight=weight
8587
self.design_matrix = design_matrix
8688
self.power=power
8789
self.alpha=alpha
@@ -93,13 +95,18 @@ def __init__(self, design_matrix='C(development) + C(origin)',
9395

9496
def fit(self, X, y=None, sample_weight=None):
9597
response = X.columns[0] if not self.response else self.response
98+
if sample_weight is None:
99+
weight = None
100+
else:
101+
weight = 'model'
96102
self.model_ = DevelopmentML(Pipeline(steps=[
97103
('design_matrix', PatsyFormula(self.design_matrix)),
98104
('model', TweedieRegressor(
99105
link=self.link, power=self.power, max_iter=self.max_iter,
100106
tol=self.tol, warm_start=self.warm_start,
101107
verbose=self.verbose, fit_intercept=False))]),
102-
y_ml=response, weight_ml=self.weight).fit(X)
108+
y_ml=response, weighted_step = weight,
109+
drop=self.drop, drop_valuation=self.drop_valuation).fit(X = X, sample_weight = sample_weight)
103110
return self
104111

105112
@property

chainladder/development/learning.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ class DevelopmentML(DevelopmentBase):
3333
Time Series aspects of the model. Predictions from one development period
3434
get used as featues in the next development period. Lags should be negative
3535
integers.
36+
weight_step: str
37+
Step name within estimator_ml that is weighted
3638
drop: tuple or list of tuples
3739
Drops specific origin/development combination(s)
3840
drop_valuation: str or list of str (default = None)
@@ -56,8 +58,7 @@ def test_func(df)
5658
return df['origin'] + 1
5759
)
5860
fit_incrementals:
59-
Whether the response variable should be converted to an incremental basis
60-
for fitting.
61+
Whether the response variable should be converted to an incremental basis for fitting.
6162
6263
Attributes
6364
----------
@@ -70,10 +71,9 @@ def test_func(df)
7071
"""
7172

7273
def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False,
73-
weight_ml=None, weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None):
74+
weighted_step=None,drop=None,drop_valuation=None,fit_incrementals=True, feat_eng=None):
7475
self.estimator_ml=estimator_ml
7576
self.y_ml=y_ml
76-
self.weight_ml = weight_ml
7777
self.weighted_step = weighted_step
7878
self.autoregressive = autoregressive
7979
self.drop = drop
@@ -168,7 +168,7 @@ def _prep_X_ml(self, X):
168168
df_base = X.incr_to_cum().to_frame(
169169
keepdims=True, implicit_axis=True, origin_as_datetime=True
170170
).reset_index().iloc[:, :-1]
171-
df = df_base.merge(X.cum_to_incr().to_frame(
171+
df = df_base.merge(X_.to_frame(
172172
keepdims=True, implicit_axis=True, origin_as_datetime=True
173173
).reset_index(), how='left',
174174
on=list(df_base.columns)).fillna(0)
@@ -177,13 +177,18 @@ def _prep_X_ml(self, X):
177177
if self.feat_eng is not None:
178178
for key, item in self.feat_eng.items():
179179
df[key] = item['func'](df=df,**item['kwargs'])
180+
return df
181+
182+
def _prep_w_ml(self,X,sample_weight=None):
180183
weight_base = (~np.isnan(X.values)).astype(float)
181-
weight = weight_base.copy()
184+
weight = weight_base.copy()
182185
if self.drop is not None:
183186
weight = weight * self._drop_func(X)
184187
if self.drop_valuation is not None:
185-
weight = weight * self._drop_valuation_func(X)
186-
return df, weight.flatten()[weight_base.flatten()>0]
188+
weight = weight * self._drop_valuation_func(X)
189+
if sample_weight is not None:
190+
weight = weight * sample_weight.values
191+
return weight.flatten()[weight_base.flatten()>0]
187192

188193
def fit(self, X, y=None, sample_weight=None):
189194
"""Fit the model with X.
@@ -194,8 +199,8 @@ def fit(self, X, y=None, sample_weight=None):
194199
Set of LDFs to which the estimator will be applied.
195200
y : None
196201
Ignored, use y_ml to set a reponse variable for the ML algorithm
197-
sample_weight : None
198-
Ignored
202+
sample_weight : Triangle-like
203+
Weights to use in the regression
199204
200205
Returns
201206
-------
@@ -214,8 +219,9 @@ def fit(self, X, y=None, sample_weight=None):
214219
self.valuation_encoder_ = dict(zip(
215220
val,
216221
(pd.Series(val).rank()-1)/{'Y':1, 'S': 2, 'Q':4, 'M': 12}[X.development_grain]))
217-
df, weight = self._prep_X_ml(X)
222+
df = self._prep_X_ml(X)
218223
self.df_ = df
224+
weight = self._prep_w_ml(X,sample_weight)
219225
self.weight_ = weight
220226
if self.weighted_step == None:
221227
sample_weights = {}
@@ -249,7 +255,7 @@ def transform(self, X):
249255
X_new : New triangle with transformed attributes.
250256
"""
251257
X_new = X.copy()
252-
X_ml, weight_ml = self._prep_X_ml(X)
258+
X_ml = self._prep_X_ml(X)
253259
y_ml=self.estimator_ml.predict(X_ml)
254260
triangle_ml, predicted_data = self._get_triangle_ml(X_ml, y_ml)
255261
backend = "cupy" if X.array_backend == "cupy" else "numpy"

chainladder/development/tests/test_barnzehn.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import numpy as np
22
import chainladder as cl
33
import pytest
4+
abc = cl.load_sample('abc')
45

56
def test_basic_bz():
6-
abc = cl.load_sample('abc')
77
assert np.all(
88
np.around(cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(abc).coef_.T.values,3).flatten()
99
== np.array([11.837,0.179,0.345,0.378,0.405,0.427,0.431,0.66,0.963,1.157,1.278,0.251,-0.056,-0.449,-0.829,-1.169,-1.508,-1.798,-2.023,-2.238,-2.428])
@@ -21,7 +21,6 @@ def test_feat_eng_1():
2121
def test_func(df):
2222
return df["development"]
2323

24-
abc = cl.load_sample('abc')
2524
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}
2625

2726
assert np.all(
@@ -38,18 +37,30 @@ def test_feat_eng_2():
3837
def origin_onehot(df,ori):
3938
return [1 if x == ori else 0 for x in df["origin"]]
4039

41-
abc = cl.load_sample('abc')
4240
feat_dict = {f'origin_{x}':{'func':origin_onehot,'kwargs':{'ori':float(x+1)}} for x in range(10)}
4341
assert np.all(
4442
np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3)
4543
== np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3)
4644
)
4745

46+
def test_drops():
47+
'''
48+
this function tests the passing in a basic drop_valuation
49+
'''
50+
def test_func(df):
51+
return df["development"]
52+
53+
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}
54+
55+
assert np.all(
56+
np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).triangle_ml_.values,3)
57+
== np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).triangle_ml_.values,3)
58+
)
59+
4860
def test_bz_2008():
4961
'''
5062
this function tests the drop parameter by recreating the example in the 2008 BZ paper, section 4.1
5163
'''
52-
abc = cl.load_sample('abc')
5364
exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]])
5465
abc_adj = abc/exposure
5566

chainladder/development/tests/test_glm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,9 @@ def test_basic_odp_cl(genins):
55
(cl.Chainladder().fit(genins).ultimate_ -
66
cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins)).ultimate_) /
77
genins.latest_diagonal).max()< 1e-2
8+
9+
def test_sample_weight(genins):
10+
assert abs(
11+
(cl.Chainladder().fit(genins).ultimate_ -
12+
cl.Chainladder().fit(cl.TweedieGLM().fit_transform(genins,sample_weight=genins/genins)).ultimate_) /
13+
genins.latest_diagonal).max()< 1e-2
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import chainladder as cl
2+
from sklearn.linear_model import LinearRegression
3+
from sklearn.pipeline import Pipeline
4+
from chainladder.utils.utility_functions import PatsyFormula
5+
6+
def test_incremental(genins):
7+
response = [genins.columns[0]]
8+
model = cl.DevelopmentML(Pipeline(steps=[
9+
('design_matrix', PatsyFormula('C(development)')),
10+
('model', LinearRegression(fit_intercept=False))]),
11+
y_ml=response,fit_incrementals=False).fit(genins)
12+
assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2
13+
14+
def test_misc(genins):
15+
model = cl.DevelopmentML(Pipeline(steps=[
16+
('design_matrix', PatsyFormula('C(development)')),
17+
('model', LinearRegression(fit_intercept=False))]),
18+
weighted_step = ['model'], fit_incrementals=False).fit(genins, sample_weight=genins/genins)
19+
assert abs(model.triangle_ml_.loc[:,:,'2010',:] - genins.mean()).max() < 1e2

0 commit comments

Comments
 (0)