-
-
Notifications
You must be signed in to change notification settings - Fork 262
/
Copy pathregression.py
192 lines (165 loc) · 6.14 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
from typing import Optional
import dask.array as da
import numpy as np
import sklearn.metrics
from dask import is_dask_collection
from dask.utils import derived_from
from .._typing import ArrayLike
def _check_sample_weight(sample_weight: Optional[ArrayLike]):
if sample_weight is not None:
raise ValueError("'sample_weight' is not supported.")
def _check_reg_targets(
y_true: ArrayLike, y_pred: ArrayLike, multioutput: Optional[str]
):
if multioutput is not None and (
is_dask_collection(multioutput) or multioutput != "uniform_average"
):
raise NotImplementedError("'multioutput' must be 'uniform_average'")
if y_true.ndim == 1:
y_true = y_true.reshape((-1, 1))
if y_pred.ndim == 1:
y_pred = y_pred.reshape((-1, 1))
# TODO: y_type, multioutput
return None, y_true, y_pred, multioutput
@derived_from(sklearn.metrics)
def mean_squared_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
squared: bool = True,
compute: bool = True,
) -> ArrayLike:
_check_sample_weight(sample_weight)
output_errors = ((y_pred - y_true) ** 2).mean(axis=0)
if isinstance(multioutput, str) or multioutput is None:
if multioutput == "raw_values":
if compute:
return output_errors.compute()
else:
return output_errors
else:
raise ValueError("Weighted 'multioutput' not supported.")
result = output_errors.mean()
if not squared:
result = da.sqrt(result)
if compute:
result = result.compute()
return result
@derived_from(sklearn.metrics)
def mean_absolute_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
_check_sample_weight(sample_weight)
output_errors = abs(y_pred - y_true).mean(axis=0)
if isinstance(multioutput, str) or multioutput is None:
if multioutput == "raw_values":
if compute:
return output_errors.compute()
else:
return output_errors
else:
raise ValueError("Weighted 'multioutput' not supported.")
result = output_errors.mean()
if compute:
result = result.compute()
return result
def mean_absolute_percentage_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
"""Mean absolute percentage error regression loss.
Note here that we do not represent the output as a percentage in range
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in
https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
If input is list then the shape must be (n_outputs,).
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
compute : bool
Whether to compute this result (default ``True``)
Returns
-------
loss : float or array-like of floats in the range [0, 1/eps]
If multioutput is 'raw_values', then mean absolute percentage error
is returned for each output separately.
If multioutput is 'uniform_average' or ``None``, then the
equally-weighted average of all output errors is returned.
MAPE output is non-negative floating point. The best value is 0.0.
But note the fact that bad predictions can lead to arbitarily large
MAPE values, especially if some y_true values are very close to zero.
Note that we return a large value instead of `inf` when y_true is zero.
"""
_check_sample_weight(sample_weight)
epsilon = np.finfo(np.float64).eps
mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon)
output_errors = mape.mean(axis=0)
if isinstance(multioutput, str) or multioutput is None:
if multioutput == "raw_values":
if compute:
return output_errors.compute()
else:
return output_errors
else:
raise ValueError("Weighted 'multioutput' not supported.")
result = output_errors.mean()
if compute:
result = result.compute()
return result
@derived_from(sklearn.metrics)
def r2_score(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
_check_sample_weight(sample_weight)
_, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, multioutput)
weight = 1.0
numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype="f8")
denominator = (weight * (y_true - y_true.mean(axis=0)) ** 2).sum(axis=0, dtype="f8")
score = da.where(
numerator == 0,
1.0,
da.where(denominator != 0, 1 - numerator / denominator, 0.0),
)
result = score.mean(axis=0)
if compute:
result = result.compute()
return result
@derived_from(sklearn.metrics)
def mean_squared_log_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
result = mean_squared_error(
np.log1p(y_true),
np.log1p(y_pred),
sample_weight=sample_weight,
multioutput=multioutput,
compute=compute,
)
return result